+++ /dev/null
-From 748bc4dd9e663f23448d8ad7e58c011a67ea1eca Mon Sep 17 00:00:00 2001
-From: "Jason A. Donenfeld" <Jason@zx2c4.com>
-Date: Thu, 22 Sep 2022 18:46:04 +0200
-Subject: random: use expired timer rather than wq for mixing fast pool
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Jason A. Donenfeld <Jason@zx2c4.com>
-
-commit 748bc4dd9e663f23448d8ad7e58c011a67ea1eca upstream.
-
-Previously, the fast pool was dumped into the main pool periodically in
-the fast pool's hard IRQ handler. This worked fine and there weren't
-problems with it, until RT came around. Since RT converts spinlocks into
-sleeping locks, problems cropped up. Rather than switching to raw
-spinlocks, the RT developers preferred we make the transformation from
-originally doing:
-
- do_some_stuff()
- spin_lock()
- do_some_other_stuff()
- spin_unlock()
-
-to doing:
-
- do_some_stuff()
- queue_work_on(some_other_stuff_worker)
-
-This is an ordinary pattern done all over the kernel. However, Sherry
-noticed a 10% performance regression in qperf TCP over a 40gbps
-InfiniBand card. Quoting her message:
-
-> MT27500 Family [ConnectX-3] cards:
-> Infiniband device 'mlx4_0' port 1 status:
-> default gid: fe80:0000:0000:0000:0010:e000:0178:9eb1
-> base lid: 0x6
-> sm lid: 0x1
-> state: 4: ACTIVE
-> phys state: 5: LinkUp
-> rate: 40 Gb/sec (4X QDR)
-> link_layer: InfiniBand
->
-> Cards are configured with IP addresses on private subnet for IPoIB
-> performance testing.
-> Regression identified in this bug is in TCP latency in this stack as reported
-> by qperf tcp_lat metric:
->
-> We have one system listen as a qperf server:
-> [root@yourQperfServer ~]# qperf
->
-> Have the other system connect to qperf server as a client (in this
-> case, it’s X7 server with Mellanox card):
-> [root@yourQperfClient ~]# numactl -m0 -N0 qperf 20.20.20.101 -v -uu -ub --time 60 --wait_server 20 -oo msg_size:4K:1024K:*2 tcp_lat
-
-Rather than incur the scheduling latency from queue_work_on, we can
-instead switch to running on the next timer tick, on the same core. This
-also batches things a bit more -- once per jiffy -- which is okay now
-that mix_interrupt_randomness() can credit multiple bits at once.
-
-Reported-by: Sherry Yang <sherry.yang@oracle.com>
-Tested-by: Paul Webb <paul.x.webb@oracle.com>
-Cc: Sherry Yang <sherry.yang@oracle.com>
-Cc: Phillip Goerl <phillip.goerl@oracle.com>
-Cc: Jack Vogel <jack.vogel@oracle.com>
-Cc: Nicky Veitch <nicky.veitch@oracle.com>
-Cc: Colm Harrington <colm.harrington@oracle.com>
-Cc: Ramanan Govindarajan <ramanan.govindarajan@oracle.com>
-Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Cc: Dominik Brodowski <linux@dominikbrodowski.net>
-Cc: Tejun Heo <tj@kernel.org>
-Cc: Sultan Alsawaf <sultan@kerneltoast.com>
-Cc: stable@vger.kernel.org
-Fixes: 58340f8e952b ("random: defer fast pool mixing to worker")
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/char/random.c | 18 +++++++++++-------
- 1 file changed, 11 insertions(+), 7 deletions(-)
-
---- a/drivers/char/random.c
-+++ b/drivers/char/random.c
-@@ -893,17 +893,20 @@ struct fast_pool {
- unsigned long pool[4];
- unsigned long last;
- unsigned int count;
-- struct work_struct mix;
-+ struct timer_list mix;
- };
-
-+static void mix_interrupt_randomness(struct timer_list *work);
-+
- static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
- #ifdef CONFIG_64BIT
- #define FASTMIX_PERM SIPHASH_PERMUTATION
-- .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 }
-+ .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 },
- #else
- #define FASTMIX_PERM HSIPHASH_PERMUTATION
-- .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 }
-+ .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 },
- #endif
-+ .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0)
- };
-
- /*
-@@ -945,7 +948,7 @@ int __cold random_online_cpu(unsigned in
- }
- #endif
-
--static void mix_interrupt_randomness(struct work_struct *work)
-+static void mix_interrupt_randomness(struct timer_list *work)
- {
- struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
- /*
-@@ -999,10 +1002,11 @@ void add_interrupt_randomness(int irq)
- if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
- return;
-
-- if (unlikely(!fast_pool->mix.func))
-- INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
- fast_pool->count |= MIX_INFLIGHT;
-- queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
-+ if (!timer_pending(&fast_pool->mix)) {
-+ fast_pool->mix.expires = jiffies;
-+ add_timer_on(&fast_pool->mix, raw_smp_processor_id());
-+ }
- }
- EXPORT_SYMBOL_GPL(add_interrupt_randomness);
-
+++ /dev/null
-From 748bc4dd9e663f23448d8ad7e58c011a67ea1eca Mon Sep 17 00:00:00 2001
-From: "Jason A. Donenfeld" <Jason@zx2c4.com>
-Date: Thu, 22 Sep 2022 18:46:04 +0200
-Subject: random: use expired timer rather than wq for mixing fast pool
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Jason A. Donenfeld <Jason@zx2c4.com>
-
-commit 748bc4dd9e663f23448d8ad7e58c011a67ea1eca upstream.
-
-Previously, the fast pool was dumped into the main pool periodically in
-the fast pool's hard IRQ handler. This worked fine and there weren't
-problems with it, until RT came around. Since RT converts spinlocks into
-sleeping locks, problems cropped up. Rather than switching to raw
-spinlocks, the RT developers preferred we make the transformation from
-originally doing:
-
- do_some_stuff()
- spin_lock()
- do_some_other_stuff()
- spin_unlock()
-
-to doing:
-
- do_some_stuff()
- queue_work_on(some_other_stuff_worker)
-
-This is an ordinary pattern done all over the kernel. However, Sherry
-noticed a 10% performance regression in qperf TCP over a 40gbps
-InfiniBand card. Quoting her message:
-
-> MT27500 Family [ConnectX-3] cards:
-> Infiniband device 'mlx4_0' port 1 status:
-> default gid: fe80:0000:0000:0000:0010:e000:0178:9eb1
-> base lid: 0x6
-> sm lid: 0x1
-> state: 4: ACTIVE
-> phys state: 5: LinkUp
-> rate: 40 Gb/sec (4X QDR)
-> link_layer: InfiniBand
->
-> Cards are configured with IP addresses on private subnet for IPoIB
-> performance testing.
-> Regression identified in this bug is in TCP latency in this stack as reported
-> by qperf tcp_lat metric:
->
-> We have one system listen as a qperf server:
-> [root@yourQperfServer ~]# qperf
->
-> Have the other system connect to qperf server as a client (in this
-> case, it’s X7 server with Mellanox card):
-> [root@yourQperfClient ~]# numactl -m0 -N0 qperf 20.20.20.101 -v -uu -ub --time 60 --wait_server 20 -oo msg_size:4K:1024K:*2 tcp_lat
-
-Rather than incur the scheduling latency from queue_work_on, we can
-instead switch to running on the next timer tick, on the same core. This
-also batches things a bit more -- once per jiffy -- which is okay now
-that mix_interrupt_randomness() can credit multiple bits at once.
-
-Reported-by: Sherry Yang <sherry.yang@oracle.com>
-Tested-by: Paul Webb <paul.x.webb@oracle.com>
-Cc: Sherry Yang <sherry.yang@oracle.com>
-Cc: Phillip Goerl <phillip.goerl@oracle.com>
-Cc: Jack Vogel <jack.vogel@oracle.com>
-Cc: Nicky Veitch <nicky.veitch@oracle.com>
-Cc: Colm Harrington <colm.harrington@oracle.com>
-Cc: Ramanan Govindarajan <ramanan.govindarajan@oracle.com>
-Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
-Cc: Dominik Brodowski <linux@dominikbrodowski.net>
-Cc: Tejun Heo <tj@kernel.org>
-Cc: Sultan Alsawaf <sultan@kerneltoast.com>
-Cc: stable@vger.kernel.org
-Fixes: 58340f8e952b ("random: defer fast pool mixing to worker")
-Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- drivers/char/random.c | 18 +++++++++++-------
- 1 file changed, 11 insertions(+), 7 deletions(-)
-
---- a/drivers/char/random.c
-+++ b/drivers/char/random.c
-@@ -894,17 +894,20 @@ struct fast_pool {
- unsigned long pool[4];
- unsigned long last;
- unsigned int count;
-- struct work_struct mix;
-+ struct timer_list mix;
- };
-
-+static void mix_interrupt_randomness(struct timer_list *work);
-+
- static DEFINE_PER_CPU(struct fast_pool, irq_randomness) = {
- #ifdef CONFIG_64BIT
- #define FASTMIX_PERM SIPHASH_PERMUTATION
-- .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 }
-+ .pool = { SIPHASH_CONST_0, SIPHASH_CONST_1, SIPHASH_CONST_2, SIPHASH_CONST_3 },
- #else
- #define FASTMIX_PERM HSIPHASH_PERMUTATION
-- .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 }
-+ .pool = { HSIPHASH_CONST_0, HSIPHASH_CONST_1, HSIPHASH_CONST_2, HSIPHASH_CONST_3 },
- #endif
-+ .mix = __TIMER_INITIALIZER(mix_interrupt_randomness, 0)
- };
-
- /*
-@@ -946,7 +949,7 @@ int __cold random_online_cpu(unsigned in
- }
- #endif
-
--static void mix_interrupt_randomness(struct work_struct *work)
-+static void mix_interrupt_randomness(struct timer_list *work)
- {
- struct fast_pool *fast_pool = container_of(work, struct fast_pool, mix);
- /*
-@@ -1000,10 +1003,11 @@ void add_interrupt_randomness(int irq)
- if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
- return;
-
-- if (unlikely(!fast_pool->mix.func))
-- INIT_WORK(&fast_pool->mix, mix_interrupt_randomness);
- fast_pool->count |= MIX_INFLIGHT;
-- queue_work_on(raw_smp_processor_id(), system_highpri_wq, &fast_pool->mix);
-+ if (!timer_pending(&fast_pool->mix)) {
-+ fast_pool->mix.expires = jiffies;
-+ add_timer_on(&fast_pool->mix, raw_smp_processor_id());
-+ }
- }
- EXPORT_SYMBOL_GPL(add_interrupt_randomness);
-