Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 81d7c56d authored by George Spelvin's avatar George Spelvin Committed by Greg Kroah-Hartman
Browse files

random32: make prandom_u32() output unpredictable



commit c51f8f88d705e06bd696d7510aff22b33eb8e638 upstream.

Non-cryptographic PRNGs may have great statistical properties, but
are usually trivially predictable to someone who knows the algorithm,
given a small sample of their output.  An LFSR like prandom_u32() is
particularly simple, even if the sample is widely scattered bits.

It turns out the network stack uses prandom_u32() for some things like
random port numbers which it would prefer are *not* trivially predictable.
Predictability led to a practical DNS spoofing attack.  Oops.

This patch replaces the LFSR with a homebrew cryptographic PRNG based
on the SipHash round function, which is in turn seeded with 128 bits
of strong random key.  (The authors of SipHash have *not* been consulted
about this abuse of their algorithm.)  Speed is prioritized over security;
attacks are rare, while performance is always wanted.

Replacing all callers of prandom_u32() is the quick fix.
Whether to reinstate a weaker PRNG for uses which can tolerate it
is an open question.

Commit f227e3ec3b5c ("random32: update the net random state on interrupt
and activity") was an earlier attempt at a solution.  This patch replaces
it.

Reported-by: default avatarAmit Klein <aksecurity@gmail.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: tytso@mit.edu
Cc: Florian Westphal <fw@strlen.de>
Cc: Marc Plumb <lkml.mplumb@gmail.com>
Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity")
Signed-off-by: default avatarGeorge Spelvin <lkml@sdf.org>
Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/


[ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions
  to prandom.h for later use; merged George's prandom_seed() proposal;
  inlined siprand_u32(); replaced the net_rand_state[] array with 4
  members to fix a build issue; cosmetic cleanups to make checkpatch
  happy; fixed RANDOM32_SELFTEST build ]
[wt: backported to 4.19 -- various context adjustments]
Signed-off-by: default avatarWilly Tarreau <w@1wt.eu>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent bc571404
Loading
Loading
Loading
Loading
+0 −1
Original line number Diff line number Diff line
@@ -1257,7 +1257,6 @@ void add_interrupt_randomness(int irq, int irq_flags)

	fast_mix(fast_pool);
	add_interrupt_bench(cycles);
	this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);

	if (unlikely(crng_init == 0)) {
		if ((fast_pool->count >= 64) &&
+34 −2
Original line number Diff line number Diff line
@@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes);
void prandom_seed(u32 seed);
void prandom_reseed_late(void);

#if BITS_PER_LONG == 64
/*
 * The core SipHash round function.  Each line can be executed in
 * parallel given enough CPU resources.
 */
#define PRND_SIPROUND(v0, v1, v2, v3) ( \
	v0 += v1, v1 = rol64(v1, 13),  v2 += v3, v3 = rol64(v3, 16), \
	v1 ^= v0, v0 = rol64(v0, 32),  v3 ^= v2,                     \
	v0 += v3, v3 = rol64(v3, 21),  v2 += v1, v1 = rol64(v1, 17), \
	v3 ^= v0,                      v1 ^= v2, v2 = rol64(v2, 32)  \
)

#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)

#elif BITS_PER_LONG == 32
/*
 * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
 * This is weaker, but 32-bit machines are not used for high-traffic
 * applications, so there is less output for an attacker to analyze.
 */
#define PRND_SIPROUND(v0, v1, v2, v3) ( \
	v0 += v1, v1 = rol32(v1,  5),  v2 += v3, v3 = rol32(v3,  8), \
	v1 ^= v0, v0 = rol32(v0, 16),  v3 ^= v2,                     \
	v0 += v3, v3 = rol32(v3,  7),  v2 += v1, v1 = rol32(v1, 13), \
	v3 ^= v0,                      v1 ^= v2, v2 = rol32(v2, 16)  \
)
#define PRND_K0 0x6c796765
#define PRND_K1 0x74656462

#else
#error Unsupported BITS_PER_LONG
#endif

struct rnd_state {
	__u32 s1, s2, s3, s4;
};

DECLARE_PER_CPU(struct rnd_state, net_rand_state);

u32 prandom_u32_state(struct rnd_state *state);
void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
+0 −7
Original line number Diff line number Diff line
@@ -1655,13 +1655,6 @@ void update_process_times(int user_tick)
	scheduler_tick();
	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
		run_posix_cpu_timers(p);

	/* The current CPU might make use of net randoms without receiving IRQs
	 * to renew them often enough. Let's update the net_rand_state from a
	 * non-constant value that's not affine to the number of calls to make
	 * sure it's updated when there's some activity (we don't care in idle).
	 */
	this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
}

/**
+283 −179
Original line number Diff line number Diff line
@@ -40,16 +40,6 @@
#include <linux/sched.h>
#include <asm/unaligned.h>

#ifdef CONFIG_RANDOM32_SELFTEST
static void __init prandom_state_selftest(void);
#else
static inline void prandom_state_selftest(void)
{
}
#endif

DEFINE_PER_CPU(struct rnd_state, net_rand_state)  __latent_entropy;

/**
 *	prandom_u32_state - seeded pseudo-random number generator.
 *	@state: pointer to state structure holding seeded state.
@@ -69,25 +59,6 @@ u32 prandom_u32_state(struct rnd_state *state)
}
EXPORT_SYMBOL(prandom_u32_state);

/**
 *	prandom_u32 - pseudo random number generator
 *
 *	A 32 bit pseudo-random number is generated using a fast
 *	algorithm suitable for simulation. This algorithm is NOT
 *	considered safe for cryptographic use.
 */
u32 prandom_u32(void)
{
	struct rnd_state *state = &get_cpu_var(net_rand_state);
	u32 res;

	res = prandom_u32_state(state);
	put_cpu_var(net_rand_state);

	return res;
}
EXPORT_SYMBOL(prandom_u32);

/**
 *	prandom_bytes_state - get the requested number of pseudo-random bytes
 *
@@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
}
EXPORT_SYMBOL(prandom_bytes_state);

/**
 *	prandom_bytes - get the requested number of pseudo-random bytes
 *	@buf: where to copy the pseudo-random bytes to
 *	@bytes: the requested number of bytes
 */
void prandom_bytes(void *buf, size_t bytes)
{
	struct rnd_state *state = &get_cpu_var(net_rand_state);

	prandom_bytes_state(state, buf, bytes);
	put_cpu_var(net_rand_state);
}
EXPORT_SYMBOL(prandom_bytes);

static void prandom_warmup(struct rnd_state *state)
{
	/* Calling RNG ten times to satisfy recurrence condition */
@@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_state *state)
	prandom_u32_state(state);
}

static u32 __extract_hwseed(void)
{
	unsigned int val = 0;

	(void)(arch_get_random_seed_int(&val) ||
	       arch_get_random_int(&val));

	return val;
}

static void prandom_seed_early(struct rnd_state *state, u32 seed,
			       bool mix_with_hwseed)
{
#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
}

/**
 *	prandom_seed - add entropy to pseudo random number generator
 *	@seed: seed value
 *
 *	Add some additional seeding to the prandom pool.
 */
void prandom_seed(u32 entropy)
{
	int i;
	/*
	 * No locking on the CPUs, but then somewhat random results are, well,
	 * expected.
	 */
	for_each_possible_cpu(i) {
		struct rnd_state *state = &per_cpu(net_rand_state, i);

		state->s1 = __seed(state->s1 ^ entropy, 2U);
		prandom_warmup(state);
	}
}
EXPORT_SYMBOL(prandom_seed);

/*
 *	Generate some initially weak seeding values to allow
 *	to start the prandom_u32() engine.
 */
static int __init prandom_init(void)
{
	int i;

	prandom_state_selftest();

	for_each_possible_cpu(i) {
		struct rnd_state *state = &per_cpu(net_rand_state, i);
		u32 weak_seed = (i + jiffies) ^ random_get_entropy();

		prandom_seed_early(state, weak_seed, true);
		prandom_warmup(state);
	}

	return 0;
}
core_initcall(prandom_init);

static void __prandom_timer(struct timer_list *unused);

static DEFINE_TIMER(seed_timer, __prandom_timer);

static void __prandom_timer(struct timer_list *unused)
{
	u32 entropy;
	unsigned long expires;

	get_random_bytes(&entropy, sizeof(entropy));
	prandom_seed(entropy);

	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
	expires = 40 + prandom_u32_max(40);
	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);

	add_timer(&seed_timer);
}

static void __init __prandom_start_seed_timer(void)
{
	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
	add_timer(&seed_timer);
}

void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
{
	int i;
@@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
}
EXPORT_SYMBOL(prandom_seed_full_state);

/*
 *	Generate better values after random number generator
 *	is fully initialized.
 */
static void __prandom_reseed(bool late)
{
	unsigned long flags;
	static bool latch = false;
	static DEFINE_SPINLOCK(lock);

	/* Asking for random bytes might result in bytes getting
	 * moved into the nonblocking pool and thus marking it
	 * as initialized. In this case we would double back into
	 * this function and attempt to do a late reseed.
	 * Ignore the pointless attempt to reseed again if we're
	 * already waiting for bytes when the nonblocking pool
	 * got initialized.
	 */

	/* only allow initial seeding (late == false) once */
	if (!spin_trylock_irqsave(&lock, flags))
		return;

	if (latch && !late)
		goto out;

	latch = true;
	prandom_seed_full_state(&net_rand_state);
out:
	spin_unlock_irqrestore(&lock, flags);
}

void prandom_reseed_late(void)
{
	__prandom_reseed(true);
}

static int __init prandom_reseed(void)
{
	__prandom_reseed(false);
	__prandom_start_seed_timer();
	return 0;
}
late_initcall(prandom_reseed);

#ifdef CONFIG_RANDOM32_SELFTEST
static struct prandom_test1 {
	u32 seed;
@@ -421,7 +243,28 @@ static struct prandom_test2 {
	{  407983964U, 921U,  728767059U },
};

static void __init prandom_state_selftest(void)
static u32 __extract_hwseed(void)
{
	unsigned int val = 0;

	(void)(arch_get_random_seed_int(&val) ||
	       arch_get_random_int(&val));

	return val;
}

static void prandom_seed_early(struct rnd_state *state, u32 seed,
			       bool mix_with_hwseed)
{
#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
}

static int __init prandom_state_selftest(void)
{
	int i, j, errors = 0, runs = 0;
	bool error = false;
@@ -461,5 +304,266 @@ static void __init prandom_state_selftest(void)
		pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
	else
		pr_info("prandom: %d self tests passed\n", runs);
	return 0;
}
core_initcall(prandom_state_selftest);
#endif

/*
 * The prandom_u32() implementation is now completely separate from the
 * prandom_state() functions, which are retained (for now) for compatibility.
 *
 * Because of (ab)use in the networking code for choosing random TCP/UDP port
 * numbers, which open DoS possibilities if guessable, we want something
 * stronger than a standard PRNG.  But the performance requirements of
 * the network code do not allow robust crypto for this application.
 *
 * So this is a homebrew Junior Spaceman implementation, based on the
 * lowest-latency trustworthy crypto primitive available, SipHash.
 * (The authors of SipHash have not been consulted about this abuse of
 * their work.)
 *
 * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
 * one word of output.  This abbreviated version uses 2 rounds per word
 * of output.
 */

struct siprand_state {
	unsigned long v0;
	unsigned long v1;
	unsigned long v2;
	unsigned long v3;
};

static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;

/*
 * This is the core CPRNG function.  As "pseudorandom", this is not used
 * for truly valuable things, just intended to be a PITA to guess.
 * For maximum speed, we do just two SipHash rounds per word.  This is
 * the same rate as 4 rounds per 64 bits that SipHash normally uses,
 * so hopefully it's reasonably secure.
 *
 * There are two changes from the official SipHash finalization:
 * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
 *   they are there only to make the output rounds distinct from the input
 *   rounds, and this application has no input rounds.
 * - Rather than returning v0^v1^v2^v3, return v1+v3.
 *   If you look at the SipHash round, the last operation on v3 is
 *   "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
 *   Likewise "v1 ^= v2".  (The rotate of v2 makes a difference, but
 *   it still cancels out half of the bits in v2 for no benefit.)
 *   Second, since the last combining operation was xor, continue the
 *   pattern of alternating xor/add for a tiny bit of extra non-linearity.
 */
static inline u32 siprand_u32(struct siprand_state *s)
{
	unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;

	PRND_SIPROUND(v0, v1, v2, v3);
	PRND_SIPROUND(v0, v1, v2, v3);
	s->v0 = v0;  s->v1 = v1;  s->v2 = v2;  s->v3 = v3;
	return v1 + v3;
}


/**
 *	prandom_u32 - pseudo random number generator
 *
 *	A 32 bit pseudo-random number is generated using a fast
 *	algorithm suitable for simulation. This algorithm is NOT
 *	considered safe for cryptographic use.
 */
u32 prandom_u32(void)
{
	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
	u32 res = siprand_u32(state);

	put_cpu_ptr(&net_rand_state);
	return res;
}
EXPORT_SYMBOL(prandom_u32);

/**
 *	prandom_bytes - get the requested number of pseudo-random bytes
 *	@buf: where to copy the pseudo-random bytes to
 *	@bytes: the requested number of bytes
 */
void prandom_bytes(void *buf, size_t bytes)
{
	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
	u8 *ptr = buf;

	while (bytes >= sizeof(u32)) {
		put_unaligned(siprand_u32(state), (u32 *)ptr);
		ptr += sizeof(u32);
		bytes -= sizeof(u32);
	}

	if (bytes > 0) {
		u32 rem = siprand_u32(state);

		do {
			*ptr++ = (u8)rem;
			rem >>= BITS_PER_BYTE;
		} while (--bytes > 0);
	}
	put_cpu_ptr(&net_rand_state);
}
EXPORT_SYMBOL(prandom_bytes);

/**
 *	prandom_seed - add entropy to pseudo random number generator
 *	@entropy: entropy value
 *
 *	Add some additional seed material to the prandom pool.
 *	The "entropy" is actually our IP address (the only caller is
 *	the network code), not for unpredictability, but to ensure that
 *	different machines are initialized differently.
 */
void prandom_seed(u32 entropy)
{
	int i;

	add_device_randomness(&entropy, sizeof(entropy));

	for_each_possible_cpu(i) {
		struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
		unsigned long v0 = state->v0, v1 = state->v1;
		unsigned long v2 = state->v2, v3 = state->v3;

		do {
			v3 ^= entropy;
			PRND_SIPROUND(v0, v1, v2, v3);
			PRND_SIPROUND(v0, v1, v2, v3);
			v0 ^= entropy;
		} while (unlikely(!v0 || !v1 || !v2 || !v3));

		WRITE_ONCE(state->v0, v0);
		WRITE_ONCE(state->v1, v1);
		WRITE_ONCE(state->v2, v2);
		WRITE_ONCE(state->v3, v3);
	}
}
EXPORT_SYMBOL(prandom_seed);

/*
 *	Generate some initially weak seeding values to allow
 *	the prandom_u32() engine to be started.
 */
static int __init prandom_init_early(void)
{
	int i;
	unsigned long v0, v1, v2, v3;

	if (!arch_get_random_long(&v0))
		v0 = jiffies;
	if (!arch_get_random_long(&v1))
		v1 = random_get_entropy();
	v2 = v0 ^ PRND_K0;
	v3 = v1 ^ PRND_K1;

	for_each_possible_cpu(i) {
		struct siprand_state *state;

		v3 ^= i;
		PRND_SIPROUND(v0, v1, v2, v3);
		PRND_SIPROUND(v0, v1, v2, v3);
		v0 ^= i;

		state = per_cpu_ptr(&net_rand_state, i);
		state->v0 = v0;  state->v1 = v1;
		state->v2 = v2;  state->v3 = v3;
	}

	return 0;
}
core_initcall(prandom_init_early);


/* Stronger reseeding when available, and periodically thereafter. */
static void prandom_reseed(struct timer_list *unused);

static DEFINE_TIMER(seed_timer, prandom_reseed);

static void prandom_reseed(struct timer_list *unused)
{
	unsigned long expires;
	int i;

	/*
	 * Reinitialize each CPU's PRNG with 128 bits of key.
	 * No locking on the CPUs, but then somewhat random results are,
	 * well, expected.
	 */
	for_each_possible_cpu(i) {
		struct siprand_state *state;
		unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
		unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
#if BITS_PER_LONG == 32
		int j;

		/*
		 * On 32-bit machines, hash in two extra words to
		 * approximate 128-bit key length.  Not that the hash
		 * has that much security, but this prevents a trivial
		 * 64-bit brute force.
		 */
		for (j = 0; j < 2; j++) {
			unsigned long m = get_random_long();

			v3 ^= m;
			PRND_SIPROUND(v0, v1, v2, v3);
			PRND_SIPROUND(v0, v1, v2, v3);
			v0 ^= m;
		}
#endif
		/*
		 * Probably impossible in practice, but there is a
		 * theoretical risk that a race between this reseeding
		 * and the target CPU writing its state back could
		 * create the all-zero SipHash fixed point.
		 *
		 * To ensure that never happens, ensure the state
		 * we write contains no zero words.
		 */
		state = per_cpu_ptr(&net_rand_state, i);
		WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
		WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
		WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
		WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
	}

	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
	expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
	mod_timer(&seed_timer, expires);
}

/*
 * The random ready callback can be called from almost any interrupt.
 * To avoid worrying about whether it's safe to delay that interrupt
 * long enough to seed all CPUs, just schedule an immediate timer event.
 */
static void prandom_timer_start(struct random_ready_callback *unused)
{
	mod_timer(&seed_timer, jiffies);
}

/*
 * Start periodic full reseeding as soon as strong
 * random numbers are available.
 */
static int __init prandom_init_late(void)
{
	static struct random_ready_callback random_ready = {
		.func = prandom_timer_start
	};
	int ret = add_random_ready_callback(&random_ready);

	if (ret == -EALREADY) {
		prandom_timer_start(&random_ready);
		ret = 0;
	}
	return ret;
}
late_initcall(prandom_init_late);