Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 1e7f583a authored by Theodore Ts'o's avatar Theodore Ts'o
Browse files

random: make /dev/urandom scalable for silly userspace programs



On a system with a 4 socket (NUMA) system where a large number of
application threads were all trying to read from /dev/urandom, this
can result in the system spending 80% of its time contending on the
global urandom spinlock.  The application should have used its own
PRNG, but let's try to help it from running, lemming-like, straight
over the locking cliff.

Reported-by: default avatarAndi Kleen <ak@linux.intel.com>
Signed-off-by: default avatarTheodore Ts'o <tytso@mit.edu>
parent e192be9d
Loading
Loading
Loading
Loading
+58 −4
Original line number Diff line number Diff line
@@ -436,6 +436,8 @@ static int crng_init = 0;
#define crng_ready() (likely(crng_init > 0))
static int crng_init_cnt = 0;
#define CRNG_INIT_CNT_THRESH (2*CHACHA20_KEY_SIZE)
static void _extract_crng(struct crng_state *crng,
			  __u8 out[CHACHA20_BLOCK_SIZE]);
static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE]);
static void process_random_ready_list(void);

@@ -756,6 +758,16 @@ static void credit_entropy_bits_safe(struct entropy_store *r, int nbits)

static DECLARE_WAIT_QUEUE_HEAD(crng_init_wait);

#ifdef CONFIG_NUMA
/*
 * Hack to deal with crazy userspace progams when they are all trying
 * to access /dev/urandom in parallel.  The programs are almost
 * certainly doing something terribly wrong, but we'll work around
 * their brain damage.
 */
static struct crng_state **crng_node_pool __read_mostly;
#endif

static void crng_initialize(struct crng_state *crng)
{
	int		i;
@@ -815,7 +827,7 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
		if (num == 0)
			return;
	} else
		extract_crng(buf.block);
		_extract_crng(&primary_crng, buf.block);
	spin_lock_irqsave(&primary_crng.lock, flags);
	for (i = 0; i < 8; i++) {
		unsigned long	rv;
@@ -835,19 +847,26 @@ static void crng_reseed(struct crng_state *crng, struct entropy_store *r)
	spin_unlock_irqrestore(&primary_crng.lock, flags);
}

static inline void maybe_reseed_primary_crng(void)
{
	if (crng_init > 2 &&
	    time_after(jiffies, primary_crng.init_time + CRNG_RESEED_INTERVAL))
		crng_reseed(&primary_crng, &input_pool);
}

static inline void crng_wait_ready(void)
{
	wait_event_interruptible(crng_init_wait, crng_ready());
}

static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
static void _extract_crng(struct crng_state *crng,
			  __u8 out[CHACHA20_BLOCK_SIZE])
{
	unsigned long v, flags;
	struct crng_state *crng = &primary_crng;

	if (crng_init > 1 &&
	    time_after(jiffies, crng->init_time + CRNG_RESEED_INTERVAL))
		crng_reseed(crng, &input_pool);
		crng_reseed(crng, crng == &primary_crng ? &input_pool : NULL);
	spin_lock_irqsave(&crng->lock, flags);
	if (arch_get_random_long(&v))
		crng->state[14] ^= v;
@@ -857,6 +876,19 @@ static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
	spin_unlock_irqrestore(&crng->lock, flags);
}

static void extract_crng(__u8 out[CHACHA20_BLOCK_SIZE])
{
	struct crng_state *crng = NULL;

#ifdef CONFIG_NUMA
	if (crng_node_pool)
		crng = crng_node_pool[numa_node_id()];
	if (crng == NULL)
#endif
		crng = &primary_crng;
	_extract_crng(crng, out);
}

static ssize_t extract_crng_user(void __user *buf, size_t nbytes)
{
	ssize_t ret = 0, i;
@@ -1575,9 +1607,31 @@ static void init_std_data(struct entropy_store *r)
 */
static int rand_initialize(void)
{
#ifdef CONFIG_NUMA
	int i;
	int num_nodes = num_possible_nodes();
	struct crng_state *crng;
	struct crng_state **pool;
#endif

	init_std_data(&input_pool);
	init_std_data(&blocking_pool);
	crng_initialize(&primary_crng);

#ifdef CONFIG_NUMA
	pool = kmalloc(num_nodes * sizeof(void *),
		       GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO);
	for (i=0; i < num_nodes; i++) {
		crng = kmalloc_node(sizeof(struct crng_state),
				    GFP_KERNEL | __GFP_NOFAIL, i);
		spin_lock_init(&crng->lock);
		crng_initialize(crng);
		pool[i] = crng;

	}
	mb();
	crng_node_pool = pool;
#endif
	return 0;
}
early_initcall(rand_initialize);