From c21fe85d19d17f68127d46e343e990fc2c3b5bef Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 17 Nov 2020 11:40:49 +0100 Subject: [PATCH] 4.19-stable patches added patches: random32-make-prandom_u32-output-unpredictable.patch x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch --- ...ake-prandom_u32-output-unpredictable.patch | 649 ++++++++++++++++++ queue-4.19/series | 2 + ...enabled-on-cpus-with-always-on-stibp.patch | 148 ++++ 3 files changed, 799 insertions(+) create mode 100644 queue-4.19/random32-make-prandom_u32-output-unpredictable.patch create mode 100644 queue-4.19/x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch diff --git a/queue-4.19/random32-make-prandom_u32-output-unpredictable.patch b/queue-4.19/random32-make-prandom_u32-output-unpredictable.patch new file mode 100644 index 00000000000..29155fe34bc --- /dev/null +++ b/queue-4.19/random32-make-prandom_u32-output-unpredictable.patch @@ -0,0 +1,649 @@ +From c51f8f88d705e06bd696d7510aff22b33eb8e638 Mon Sep 17 00:00:00 2001 +From: George Spelvin +Date: Sun, 9 Aug 2020 06:57:44 +0000 +Subject: random32: make prandom_u32() output unpredictable + +From: George Spelvin + +commit c51f8f88d705e06bd696d7510aff22b33eb8e638 upstream. + +Non-cryptographic PRNGs may have great statistical properties, but +are usually trivially predictable to someone who knows the algorithm, +given a small sample of their output. An LFSR like prandom_u32() is +particularly simple, even if the sample is widely scattered bits. + +It turns out the network stack uses prandom_u32() for some things like +random port numbers which it would prefer are *not* trivially predictable. +Predictability led to a practical DNS spoofing attack. Oops. + +This patch replaces the LFSR with a homebrew cryptographic PRNG based +on the SipHash round function, which is in turn seeded with 128 bits +of strong random key. (The authors of SipHash have *not* been consulted +about this abuse of their algorithm.) Speed is prioritized over security; +attacks are rare, while performance is always wanted. + +Replacing all callers of prandom_u32() is the quick fix. +Whether to reinstate a weaker PRNG for uses which can tolerate it +is an open question. + +Commit f227e3ec3b5c ("random32: update the net random state on interrupt +and activity") was an earlier attempt at a solution. This patch replaces +it. + +Reported-by: Amit Klein +Cc: Willy Tarreau +Cc: Eric Dumazet +Cc: "Jason A. Donenfeld" +Cc: Andy Lutomirski +Cc: Kees Cook +Cc: Thomas Gleixner +Cc: Peter Zijlstra +Cc: Linus Torvalds +Cc: tytso@mit.edu +Cc: Florian Westphal +Cc: Marc Plumb +Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity") +Signed-off-by: George Spelvin +Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ +[ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions + to prandom.h for later use; merged George's prandom_seed() proposal; + inlined siprand_u32(); replaced the net_rand_state[] array with 4 + members to fix a build issue; cosmetic cleanups to make checkpatch + happy; fixed RANDOM32_SELFTEST build ] +[wt: backported to 4.19 -- various context adjustments] +Signed-off-by: Willy Tarreau +Signed-off-by: Greg Kroah-Hartman +--- + drivers/char/random.c | 1 + include/linux/prandom.h | 36 +++ + kernel/time/timer.c | 7 + lib/random32.c | 462 +++++++++++++++++++++++++++++------------------- + 4 files changed, 317 insertions(+), 189 deletions(-) + +--- a/drivers/char/random.c ++++ b/drivers/char/random.c +@@ -1257,7 +1257,6 @@ void add_interrupt_randomness(int irq, i + + fast_mix(fast_pool); + add_interrupt_bench(cycles); +- this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]); + + if (unlikely(crng_init == 0)) { + if ((fast_pool->count >= 64) && +--- a/include/linux/prandom.h ++++ b/include/linux/prandom.h +@@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nby + void prandom_seed(u32 seed); + void prandom_reseed_late(void); + ++#if BITS_PER_LONG == 64 ++/* ++ * The core SipHash round function. Each line can be executed in ++ * parallel given enough CPU resources. ++ */ ++#define PRND_SIPROUND(v0, v1, v2, v3) ( \ ++ v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ ++ v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ ++ v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ ++ v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ ++) ++ ++#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) ++#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) ++ ++#elif BITS_PER_LONG == 32 ++/* ++ * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. ++ * This is weaker, but 32-bit machines are not used for high-traffic ++ * applications, so there is less output for an attacker to analyze. ++ */ ++#define PRND_SIPROUND(v0, v1, v2, v3) ( \ ++ v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ ++ v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ ++ v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ ++ v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ ++) ++#define PRND_K0 0x6c796765 ++#define PRND_K1 0x74656462 ++ ++#else ++#error Unsupported BITS_PER_LONG ++#endif ++ + struct rnd_state { + __u32 s1, s2, s3, s4; + }; + +-DECLARE_PER_CPU(struct rnd_state, net_rand_state); +- + u32 prandom_u32_state(struct rnd_state *state); + void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); + void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); +--- a/kernel/time/timer.c ++++ b/kernel/time/timer.c +@@ -1655,13 +1655,6 @@ void update_process_times(int user_tick) + scheduler_tick(); + if (IS_ENABLED(CONFIG_POSIX_TIMERS)) + run_posix_cpu_timers(p); +- +- /* The current CPU might make use of net randoms without receiving IRQs +- * to renew them often enough. Let's update the net_rand_state from a +- * non-constant value that's not affine to the number of calls to make +- * sure it's updated when there's some activity (we don't care in idle). +- */ +- this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); + } + + /** +--- a/lib/random32.c ++++ b/lib/random32.c +@@ -40,16 +40,6 @@ + #include + #include + +-#ifdef CONFIG_RANDOM32_SELFTEST +-static void __init prandom_state_selftest(void); +-#else +-static inline void prandom_state_selftest(void) +-{ +-} +-#endif +- +-DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; +- + /** + * prandom_u32_state - seeded pseudo-random number generator. + * @state: pointer to state structure holding seeded state. +@@ -70,25 +60,6 @@ u32 prandom_u32_state(struct rnd_state * + EXPORT_SYMBOL(prandom_u32_state); + + /** +- * prandom_u32 - pseudo random number generator +- * +- * A 32 bit pseudo-random number is generated using a fast +- * algorithm suitable for simulation. This algorithm is NOT +- * considered safe for cryptographic use. +- */ +-u32 prandom_u32(void) +-{ +- struct rnd_state *state = &get_cpu_var(net_rand_state); +- u32 res; +- +- res = prandom_u32_state(state); +- put_cpu_var(net_rand_state); +- +- return res; +-} +-EXPORT_SYMBOL(prandom_u32); +- +-/** + * prandom_bytes_state - get the requested number of pseudo-random bytes + * + * @state: pointer to state structure holding seeded state. +@@ -119,20 +90,6 @@ void prandom_bytes_state(struct rnd_stat + } + EXPORT_SYMBOL(prandom_bytes_state); + +-/** +- * prandom_bytes - get the requested number of pseudo-random bytes +- * @buf: where to copy the pseudo-random bytes to +- * @bytes: the requested number of bytes +- */ +-void prandom_bytes(void *buf, size_t bytes) +-{ +- struct rnd_state *state = &get_cpu_var(net_rand_state); +- +- prandom_bytes_state(state, buf, bytes); +- put_cpu_var(net_rand_state); +-} +-EXPORT_SYMBOL(prandom_bytes); +- + static void prandom_warmup(struct rnd_state *state) + { + /* Calling RNG ten times to satisfy recurrence condition */ +@@ -148,96 +105,6 @@ static void prandom_warmup(struct rnd_st + prandom_u32_state(state); + } + +-static u32 __extract_hwseed(void) +-{ +- unsigned int val = 0; +- +- (void)(arch_get_random_seed_int(&val) || +- arch_get_random_int(&val)); +- +- return val; +-} +- +-static void prandom_seed_early(struct rnd_state *state, u32 seed, +- bool mix_with_hwseed) +-{ +-#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) +- state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); +- state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); +- state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); +- state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); +-} +- +-/** +- * prandom_seed - add entropy to pseudo random number generator +- * @seed: seed value +- * +- * Add some additional seeding to the prandom pool. +- */ +-void prandom_seed(u32 entropy) +-{ +- int i; +- /* +- * No locking on the CPUs, but then somewhat random results are, well, +- * expected. +- */ +- for_each_possible_cpu(i) { +- struct rnd_state *state = &per_cpu(net_rand_state, i); +- +- state->s1 = __seed(state->s1 ^ entropy, 2U); +- prandom_warmup(state); +- } +-} +-EXPORT_SYMBOL(prandom_seed); +- +-/* +- * Generate some initially weak seeding values to allow +- * to start the prandom_u32() engine. +- */ +-static int __init prandom_init(void) +-{ +- int i; +- +- prandom_state_selftest(); +- +- for_each_possible_cpu(i) { +- struct rnd_state *state = &per_cpu(net_rand_state, i); +- u32 weak_seed = (i + jiffies) ^ random_get_entropy(); +- +- prandom_seed_early(state, weak_seed, true); +- prandom_warmup(state); +- } +- +- return 0; +-} +-core_initcall(prandom_init); +- +-static void __prandom_timer(struct timer_list *unused); +- +-static DEFINE_TIMER(seed_timer, __prandom_timer); +- +-static void __prandom_timer(struct timer_list *unused) +-{ +- u32 entropy; +- unsigned long expires; +- +- get_random_bytes(&entropy, sizeof(entropy)); +- prandom_seed(entropy); +- +- /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ +- expires = 40 + prandom_u32_max(40); +- seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); +- +- add_timer(&seed_timer); +-} +- +-static void __init __prandom_start_seed_timer(void) +-{ +- seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); +- add_timer(&seed_timer); +-} +- + void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) + { + int i; +@@ -257,51 +124,6 @@ void prandom_seed_full_state(struct rnd_ + } + EXPORT_SYMBOL(prandom_seed_full_state); + +-/* +- * Generate better values after random number generator +- * is fully initialized. +- */ +-static void __prandom_reseed(bool late) +-{ +- unsigned long flags; +- static bool latch = false; +- static DEFINE_SPINLOCK(lock); +- +- /* Asking for random bytes might result in bytes getting +- * moved into the nonblocking pool and thus marking it +- * as initialized. In this case we would double back into +- * this function and attempt to do a late reseed. +- * Ignore the pointless attempt to reseed again if we're +- * already waiting for bytes when the nonblocking pool +- * got initialized. +- */ +- +- /* only allow initial seeding (late == false) once */ +- if (!spin_trylock_irqsave(&lock, flags)) +- return; +- +- if (latch && !late) +- goto out; +- +- latch = true; +- prandom_seed_full_state(&net_rand_state); +-out: +- spin_unlock_irqrestore(&lock, flags); +-} +- +-void prandom_reseed_late(void) +-{ +- __prandom_reseed(true); +-} +- +-static int __init prandom_reseed(void) +-{ +- __prandom_reseed(false); +- __prandom_start_seed_timer(); +- return 0; +-} +-late_initcall(prandom_reseed); +- + #ifdef CONFIG_RANDOM32_SELFTEST + static struct prandom_test1 { + u32 seed; +@@ -421,7 +243,28 @@ static struct prandom_test2 { + { 407983964U, 921U, 728767059U }, + }; + +-static void __init prandom_state_selftest(void) ++static u32 __extract_hwseed(void) ++{ ++ unsigned int val = 0; ++ ++ (void)(arch_get_random_seed_int(&val) || ++ arch_get_random_int(&val)); ++ ++ return val; ++} ++ ++static void prandom_seed_early(struct rnd_state *state, u32 seed, ++ bool mix_with_hwseed) ++{ ++#define LCG(x) ((x) * 69069U) /* super-duper LCG */ ++#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) ++ state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); ++ state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); ++ state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); ++ state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); ++} ++ ++static int __init prandom_state_selftest(void) + { + int i, j, errors = 0, runs = 0; + bool error = false; +@@ -461,5 +304,266 @@ static void __init prandom_state_selftes + pr_warn("prandom: %d/%d self tests failed\n", errors, runs); + else + pr_info("prandom: %d self tests passed\n", runs); ++ return 0; ++} ++core_initcall(prandom_state_selftest); ++#endif ++ ++/* ++ * The prandom_u32() implementation is now completely separate from the ++ * prandom_state() functions, which are retained (for now) for compatibility. ++ * ++ * Because of (ab)use in the networking code for choosing random TCP/UDP port ++ * numbers, which open DoS possibilities if guessable, we want something ++ * stronger than a standard PRNG. But the performance requirements of ++ * the network code do not allow robust crypto for this application. ++ * ++ * So this is a homebrew Junior Spaceman implementation, based on the ++ * lowest-latency trustworthy crypto primitive available, SipHash. ++ * (The authors of SipHash have not been consulted about this abuse of ++ * their work.) ++ * ++ * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to ++ * one word of output. This abbreviated version uses 2 rounds per word ++ * of output. ++ */ ++ ++struct siprand_state { ++ unsigned long v0; ++ unsigned long v1; ++ unsigned long v2; ++ unsigned long v3; ++}; ++ ++static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; ++ ++/* ++ * This is the core CPRNG function. As "pseudorandom", this is not used ++ * for truly valuable things, just intended to be a PITA to guess. ++ * For maximum speed, we do just two SipHash rounds per word. This is ++ * the same rate as 4 rounds per 64 bits that SipHash normally uses, ++ * so hopefully it's reasonably secure. ++ * ++ * There are two changes from the official SipHash finalization: ++ * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; ++ * they are there only to make the output rounds distinct from the input ++ * rounds, and this application has no input rounds. ++ * - Rather than returning v0^v1^v2^v3, return v1+v3. ++ * If you look at the SipHash round, the last operation on v3 is ++ * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. ++ * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but ++ * it still cancels out half of the bits in v2 for no benefit.) ++ * Second, since the last combining operation was xor, continue the ++ * pattern of alternating xor/add for a tiny bit of extra non-linearity. ++ */ ++static inline u32 siprand_u32(struct siprand_state *s) ++{ ++ unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; ++ ++ PRND_SIPROUND(v0, v1, v2, v3); ++ PRND_SIPROUND(v0, v1, v2, v3); ++ s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; ++ return v1 + v3; ++} ++ ++ ++/** ++ * prandom_u32 - pseudo random number generator ++ * ++ * A 32 bit pseudo-random number is generated using a fast ++ * algorithm suitable for simulation. This algorithm is NOT ++ * considered safe for cryptographic use. ++ */ ++u32 prandom_u32(void) ++{ ++ struct siprand_state *state = get_cpu_ptr(&net_rand_state); ++ u32 res = siprand_u32(state); ++ ++ put_cpu_ptr(&net_rand_state); ++ return res; ++} ++EXPORT_SYMBOL(prandom_u32); ++ ++/** ++ * prandom_bytes - get the requested number of pseudo-random bytes ++ * @buf: where to copy the pseudo-random bytes to ++ * @bytes: the requested number of bytes ++ */ ++void prandom_bytes(void *buf, size_t bytes) ++{ ++ struct siprand_state *state = get_cpu_ptr(&net_rand_state); ++ u8 *ptr = buf; ++ ++ while (bytes >= sizeof(u32)) { ++ put_unaligned(siprand_u32(state), (u32 *)ptr); ++ ptr += sizeof(u32); ++ bytes -= sizeof(u32); ++ } ++ ++ if (bytes > 0) { ++ u32 rem = siprand_u32(state); ++ ++ do { ++ *ptr++ = (u8)rem; ++ rem >>= BITS_PER_BYTE; ++ } while (--bytes > 0); ++ } ++ put_cpu_ptr(&net_rand_state); + } ++EXPORT_SYMBOL(prandom_bytes); ++ ++/** ++ * prandom_seed - add entropy to pseudo random number generator ++ * @entropy: entropy value ++ * ++ * Add some additional seed material to the prandom pool. ++ * The "entropy" is actually our IP address (the only caller is ++ * the network code), not for unpredictability, but to ensure that ++ * different machines are initialized differently. ++ */ ++void prandom_seed(u32 entropy) ++{ ++ int i; ++ ++ add_device_randomness(&entropy, sizeof(entropy)); ++ ++ for_each_possible_cpu(i) { ++ struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); ++ unsigned long v0 = state->v0, v1 = state->v1; ++ unsigned long v2 = state->v2, v3 = state->v3; ++ ++ do { ++ v3 ^= entropy; ++ PRND_SIPROUND(v0, v1, v2, v3); ++ PRND_SIPROUND(v0, v1, v2, v3); ++ v0 ^= entropy; ++ } while (unlikely(!v0 || !v1 || !v2 || !v3)); ++ ++ WRITE_ONCE(state->v0, v0); ++ WRITE_ONCE(state->v1, v1); ++ WRITE_ONCE(state->v2, v2); ++ WRITE_ONCE(state->v3, v3); ++ } ++} ++EXPORT_SYMBOL(prandom_seed); ++ ++/* ++ * Generate some initially weak seeding values to allow ++ * the prandom_u32() engine to be started. ++ */ ++static int __init prandom_init_early(void) ++{ ++ int i; ++ unsigned long v0, v1, v2, v3; ++ ++ if (!arch_get_random_long(&v0)) ++ v0 = jiffies; ++ if (!arch_get_random_long(&v1)) ++ v1 = random_get_entropy(); ++ v2 = v0 ^ PRND_K0; ++ v3 = v1 ^ PRND_K1; ++ ++ for_each_possible_cpu(i) { ++ struct siprand_state *state; ++ ++ v3 ^= i; ++ PRND_SIPROUND(v0, v1, v2, v3); ++ PRND_SIPROUND(v0, v1, v2, v3); ++ v0 ^= i; ++ ++ state = per_cpu_ptr(&net_rand_state, i); ++ state->v0 = v0; state->v1 = v1; ++ state->v2 = v2; state->v3 = v3; ++ } ++ ++ return 0; ++} ++core_initcall(prandom_init_early); ++ ++ ++/* Stronger reseeding when available, and periodically thereafter. */ ++static void prandom_reseed(struct timer_list *unused); ++ ++static DEFINE_TIMER(seed_timer, prandom_reseed); ++ ++static void prandom_reseed(struct timer_list *unused) ++{ ++ unsigned long expires; ++ int i; ++ ++ /* ++ * Reinitialize each CPU's PRNG with 128 bits of key. ++ * No locking on the CPUs, but then somewhat random results are, ++ * well, expected. ++ */ ++ for_each_possible_cpu(i) { ++ struct siprand_state *state; ++ unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; ++ unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; ++#if BITS_PER_LONG == 32 ++ int j; ++ ++ /* ++ * On 32-bit machines, hash in two extra words to ++ * approximate 128-bit key length. Not that the hash ++ * has that much security, but this prevents a trivial ++ * 64-bit brute force. ++ */ ++ for (j = 0; j < 2; j++) { ++ unsigned long m = get_random_long(); ++ ++ v3 ^= m; ++ PRND_SIPROUND(v0, v1, v2, v3); ++ PRND_SIPROUND(v0, v1, v2, v3); ++ v0 ^= m; ++ } + #endif ++ /* ++ * Probably impossible in practice, but there is a ++ * theoretical risk that a race between this reseeding ++ * and the target CPU writing its state back could ++ * create the all-zero SipHash fixed point. ++ * ++ * To ensure that never happens, ensure the state ++ * we write contains no zero words. ++ */ ++ state = per_cpu_ptr(&net_rand_state, i); ++ WRITE_ONCE(state->v0, v0 ? v0 : -1ul); ++ WRITE_ONCE(state->v1, v1 ? v1 : -1ul); ++ WRITE_ONCE(state->v2, v2 ? v2 : -1ul); ++ WRITE_ONCE(state->v3, v3 ? v3 : -1ul); ++ } ++ ++ /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ ++ expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); ++ mod_timer(&seed_timer, expires); ++} ++ ++/* ++ * The random ready callback can be called from almost any interrupt. ++ * To avoid worrying about whether it's safe to delay that interrupt ++ * long enough to seed all CPUs, just schedule an immediate timer event. ++ */ ++static void prandom_timer_start(struct random_ready_callback *unused) ++{ ++ mod_timer(&seed_timer, jiffies); ++} ++ ++/* ++ * Start periodic full reseeding as soon as strong ++ * random numbers are available. ++ */ ++static int __init prandom_init_late(void) ++{ ++ static struct random_ready_callback random_ready = { ++ .func = prandom_timer_start ++ }; ++ int ret = add_random_ready_callback(&random_ready); ++ ++ if (ret == -EALREADY) { ++ prandom_timer_start(&random_ready); ++ ret = 0; ++ } ++ return ret; ++} ++late_initcall(prandom_init_late); diff --git a/queue-4.19/series b/queue-4.19/series index 3ebc6358db4..5b6827526a4 100644 --- a/queue-4.19/series +++ b/queue-4.19/series @@ -91,3 +91,5 @@ net-x25-fix-null-ptr-deref-in-x25_connect.patch tipc-fix-memory-leak-in-tipc_topsrv_start.patch vrf-fix-fast-path-output-packet-handling-with-async-netfilter-rules.patch r8169-fix-potential-skb-double-free-in-an-error-path.patch +random32-make-prandom_u32-output-unpredictable.patch +x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch diff --git a/queue-4.19/x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch b/queue-4.19/x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch new file mode 100644 index 00000000000..e615794b3d0 --- /dev/null +++ b/queue-4.19/x86-speculation-allow-ibpb-to-be-conditionally-enabled-on-cpus-with-always-on-stibp.patch @@ -0,0 +1,148 @@ +From 1978b3a53a74e3230cd46932b149c6e62e832e9a Mon Sep 17 00:00:00 2001 +From: Anand K Mistry +Date: Thu, 5 Nov 2020 16:33:04 +1100 +Subject: x86/speculation: Allow IBPB to be conditionally enabled on CPUs with always-on STIBP + +From: Anand K Mistry + +commit 1978b3a53a74e3230cd46932b149c6e62e832e9a upstream. + +On AMD CPUs which have the feature X86_FEATURE_AMD_STIBP_ALWAYS_ON, +STIBP is set to on and + + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED + +At the same time, IBPB can be set to conditional. + +However, this leads to the case where it's impossible to turn on IBPB +for a process because in the PR_SPEC_DISABLE case in ib_prctl_set() the + + spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED + +condition leads to a return before the task flag is set. Similarly, +ib_prctl_get() will return PR_SPEC_DISABLE even though IBPB is set to +conditional. + +More generally, the following cases are possible: + +1. STIBP = conditional && IBPB = on for spectre_v2_user=seccomp,ibpb +2. STIBP = on && IBPB = conditional for AMD CPUs with + X86_FEATURE_AMD_STIBP_ALWAYS_ON + +The first case functions correctly today, but only because +spectre_v2_user_ibpb isn't updated to reflect the IBPB mode. + +At a high level, this change does one thing. If either STIBP or IBPB +is set to conditional, allow the prctl to change the task flag. +Also, reflect that capability when querying the state. This isn't +perfect since it doesn't take into account if only STIBP or IBPB is +unconditionally on. But it allows the conditional feature to work as +expected, without affecting the unconditional one. + + [ bp: Massage commit message and comment; space out statements for + better readability. ] + +Fixes: 21998a351512 ("x86/speculation: Avoid force-disabling IBPB based on STIBP and enhanced IBRS.") +Signed-off-by: Anand K Mistry +Signed-off-by: Borislav Petkov +Acked-by: Thomas Gleixner +Acked-by: Tom Lendacky +Link: https://lkml.kernel.org/r/20201105163246.v2.1.Ifd7243cd3e2c2206a893ad0a5b9a4f19549e22c6@changeid +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/cpu/bugs.c | 52 ++++++++++++++++++++++++++++----------------- + 1 file changed, 33 insertions(+), 19 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1240,6 +1240,14 @@ static int ssb_prctl_set(struct task_str + return 0; + } + ++static bool is_spec_ib_user_controlled(void) ++{ ++ return spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || ++ spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP; ++} ++ + static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) + { + switch (ctrl) { +@@ -1247,17 +1255,26 @@ static int ib_prctl_set(struct task_stru + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) + return 0; +- /* +- * Indirect branch speculation is always disabled in strict +- * mode. It can neither be enabled if it was force-disabled +- * by a previous prctl call. + ++ /* ++ * With strict mode for both IBPB and STIBP, the instruction ++ * code paths avoid checking this task flag and instead, ++ * unconditionally run the instruction. However, STIBP and IBPB ++ * are independent and either can be set to conditionally ++ * enabled regardless of the mode of the other. ++ * ++ * If either is set to conditional, allow the task flag to be ++ * updated, unless it was force-disabled by a previous prctl ++ * call. Currently, this is possible on an AMD CPU which has the ++ * feature X86_FEATURE_AMD_STIBP_ALWAYS_ON. In this case, if the ++ * kernel is booted with 'spectre_v2_user=seccomp', then ++ * spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP and ++ * spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED. + */ +- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED || ++ if (!is_spec_ib_user_controlled() || + task_spec_ib_force_disable(task)) + return -EPERM; ++ + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; +@@ -1270,10 +1287,10 @@ static int ib_prctl_set(struct task_stru + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) + return -EPERM; +- if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) ++ ++ if (!is_spec_ib_user_controlled()) + return 0; ++ + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); +@@ -1336,20 +1353,17 @@ static int ib_prctl_get(struct task_stru + if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE && + spectre_v2_user_stibp == SPECTRE_V2_USER_NONE) + return PR_SPEC_ENABLE; +- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || +- spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) +- return PR_SPEC_DISABLE; +- else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL || +- spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP || +- spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL || +- spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) { ++ else if (is_spec_ib_user_controlled()) { + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; +- } else ++ } else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT || ++ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED) ++ return PR_SPEC_DISABLE; ++ else + return PR_SPEC_NOT_AFFECTED; + } + -- 2.47.3