From: Greg Kroah-Hartman Date: Wed, 28 Jul 2021 12:29:52 +0000 (+0200) Subject: drop queue-5.10/net-introduce-preferred-busy-polling.patch X-Git-Tag: v5.10.54~2 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=ee16fa208d86a9d4483158e4457fc95ed72b8278;p=thirdparty%2Fkernel%2Fstable-queue.git drop queue-5.10/net-introduce-preferred-busy-polling.patch --- diff --git a/queue-5.10/net-introduce-preferred-busy-polling.patch b/queue-5.10/net-introduce-preferred-busy-polling.patch deleted file mode 100644 index 85db83a47c7..00000000000 --- a/queue-5.10/net-introduce-preferred-busy-polling.patch +++ /dev/null @@ -1,442 +0,0 @@ -From 68a591042e8fd5c95c5882d812e76ed215988e5b Mon Sep 17 00:00:00 2001 -From: Sasha Levin -Date: Mon, 30 Nov 2020 19:51:56 +0100 -Subject: net: Introduce preferred busy-polling -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -From: Björn Töpel - -[ Upstream commit 7fd3253a7de6a317a0683f83739479fb880bffc8 ] - -The existing busy-polling mode, enabled by the SO_BUSY_POLL socket -option or system-wide using the /proc/sys/net/core/busy_read knob, is -an opportunistic. That means that if the NAPI context is not -scheduled, it will poll it. If, after busy-polling, the budget is -exceeded the busy-polling logic will schedule the NAPI onto the -regular softirq handling. - -One implication of the behavior above is that a busy/heavy loaded NAPI -context will never enter/allow for busy-polling. Some applications -prefer that most NAPI processing would be done by busy-polling. - -This series adds a new socket option, SO_PREFER_BUSY_POLL, that works -in concert with the napi_defer_hard_irqs and gro_flush_timeout -knobs. The napi_defer_hard_irqs and gro_flush_timeout knobs were -introduced in commit 6f8b12d661d0 ("net: napi: add hard irqs deferral -feature"), and allows for a user to defer interrupts to be enabled and -instead schedule the NAPI context from a watchdog timer. When a user -enables the SO_PREFER_BUSY_POLL, again with the other knobs enabled, -and the NAPI context is being processed by a softirq, the softirq NAPI -processing will exit early to allow the busy-polling to be performed. - -If the application stops performing busy-polling via a system call, -the watchdog timer defined by gro_flush_timeout will timeout, and -regular softirq handling will resume. - -In summary; Heavy traffic applications that prefer busy-polling over -softirq processing should use this option. - -Example usage: - - $ echo 2 | sudo tee /sys/class/net/ens785f1/napi_defer_hard_irqs - $ echo 200000 | sudo tee /sys/class/net/ens785f1/gro_flush_timeout - -Note that the timeout should be larger than the userspace processing -window, otherwise the watchdog will timeout and fall back to regular -softirq processing. - -Enable the SO_BUSY_POLL/SO_PREFER_BUSY_POLL options on your socket. - -Signed-off-by: Björn Töpel -Signed-off-by: Daniel Borkmann -Reviewed-by: Jakub Kicinski -Link: https://lore.kernel.org/bpf/20201130185205.196029-2-bjorn.topel@gmail.com -Signed-off-by: Sasha Levin ---- - arch/alpha/include/uapi/asm/socket.h | 2 + - arch/mips/include/uapi/asm/socket.h | 2 + - arch/parisc/include/uapi/asm/socket.h | 2 + - arch/sparc/include/uapi/asm/socket.h | 2 + - fs/eventpoll.c | 2 +- - include/linux/netdevice.h | 35 +++++++----- - include/net/busy_poll.h | 5 +- - include/net/sock.h | 4 ++ - include/uapi/asm-generic/socket.h | 2 + - net/core/dev.c | 78 +++++++++++++++++++++------ - net/core/sock.c | 9 ++++ - 11 files changed, 111 insertions(+), 32 deletions(-) - -diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h -index de6c4df61082..538359642554 100644 ---- a/arch/alpha/include/uapi/asm/socket.h -+++ b/arch/alpha/include/uapi/asm/socket.h -@@ -124,6 +124,8 @@ - - #define SO_DETACH_REUSEPORT_BPF 68 - -+#define SO_PREFER_BUSY_POLL 69 -+ - #if !defined(__KERNEL__) - - #if __BITS_PER_LONG == 64 -diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h -index d0a9ed2ca2d6..e406e73b5e6e 100644 ---- a/arch/mips/include/uapi/asm/socket.h -+++ b/arch/mips/include/uapi/asm/socket.h -@@ -135,6 +135,8 @@ - - #define SO_DETACH_REUSEPORT_BPF 68 - -+#define SO_PREFER_BUSY_POLL 69 -+ - #if !defined(__KERNEL__) - - #if __BITS_PER_LONG == 64 -diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h -index 10173c32195e..1bc46200889d 100644 ---- a/arch/parisc/include/uapi/asm/socket.h -+++ b/arch/parisc/include/uapi/asm/socket.h -@@ -116,6 +116,8 @@ - - #define SO_DETACH_REUSEPORT_BPF 0x4042 - -+#define SO_PREFER_BUSY_POLL 0x4043 -+ - #if !defined(__KERNEL__) - - #if __BITS_PER_LONG == 64 -diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h -index 8029b681fc7c..99688cf673a4 100644 ---- a/arch/sparc/include/uapi/asm/socket.h -+++ b/arch/sparc/include/uapi/asm/socket.h -@@ -117,6 +117,8 @@ - - #define SO_DETACH_REUSEPORT_BPF 0x0047 - -+#define SO_PREFER_BUSY_POLL 0x0048 -+ - #if !defined(__KERNEL__) - - -diff --git a/fs/eventpoll.c b/fs/eventpoll.c -index 6094b2e9058b..9e5b05e818ad 100644 ---- a/fs/eventpoll.c -+++ b/fs/eventpoll.c -@@ -397,7 +397,7 @@ static void ep_busy_loop(struct eventpoll *ep, int nonblock) - unsigned int napi_id = READ_ONCE(ep->napi_id); - - if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) -- napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep); -+ napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false); - } - - static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) -diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h -index e37480b5f4c0..2488638a8749 100644 ---- a/include/linux/netdevice.h -+++ b/include/linux/netdevice.h -@@ -350,23 +350,25 @@ struct napi_struct { - }; - - enum { -- NAPI_STATE_SCHED, /* Poll is scheduled */ -- NAPI_STATE_MISSED, /* reschedule a napi */ -- NAPI_STATE_DISABLE, /* Disable pending */ -- NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ -- NAPI_STATE_LISTED, /* NAPI added to system lists */ -- NAPI_STATE_NO_BUSY_POLL,/* Do not add in napi_hash, no busy polling */ -- NAPI_STATE_IN_BUSY_POLL,/* sk_busy_loop() owns this NAPI */ -+ NAPI_STATE_SCHED, /* Poll is scheduled */ -+ NAPI_STATE_MISSED, /* reschedule a napi */ -+ NAPI_STATE_DISABLE, /* Disable pending */ -+ NAPI_STATE_NPSVC, /* Netpoll - don't dequeue from poll_list */ -+ NAPI_STATE_LISTED, /* NAPI added to system lists */ -+ NAPI_STATE_NO_BUSY_POLL, /* Do not add in napi_hash, no busy polling */ -+ NAPI_STATE_IN_BUSY_POLL, /* sk_busy_loop() owns this NAPI */ -+ NAPI_STATE_PREFER_BUSY_POLL, /* prefer busy-polling over softirq processing*/ - }; - - enum { -- NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), -- NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), -- NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), -- NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), -- NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), -- NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), -- NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), -+ NAPIF_STATE_SCHED = BIT(NAPI_STATE_SCHED), -+ NAPIF_STATE_MISSED = BIT(NAPI_STATE_MISSED), -+ NAPIF_STATE_DISABLE = BIT(NAPI_STATE_DISABLE), -+ NAPIF_STATE_NPSVC = BIT(NAPI_STATE_NPSVC), -+ NAPIF_STATE_LISTED = BIT(NAPI_STATE_LISTED), -+ NAPIF_STATE_NO_BUSY_POLL = BIT(NAPI_STATE_NO_BUSY_POLL), -+ NAPIF_STATE_IN_BUSY_POLL = BIT(NAPI_STATE_IN_BUSY_POLL), -+ NAPIF_STATE_PREFER_BUSY_POLL = BIT(NAPI_STATE_PREFER_BUSY_POLL), - }; - - enum gro_result { -@@ -437,6 +439,11 @@ static inline bool napi_disable_pending(struct napi_struct *n) - return test_bit(NAPI_STATE_DISABLE, &n->state); - } - -+static inline bool napi_prefer_busy_poll(struct napi_struct *n) -+{ -+ return test_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); -+} -+ - bool napi_schedule_prep(struct napi_struct *n); - - /** -diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h -index b001fa91c14e..0292b8353d7e 100644 ---- a/include/net/busy_poll.h -+++ b/include/net/busy_poll.h -@@ -43,7 +43,7 @@ bool sk_busy_loop_end(void *p, unsigned long start_time); - - void napi_busy_loop(unsigned int napi_id, - bool (*loop_end)(void *, unsigned long), -- void *loop_end_arg); -+ void *loop_end_arg, bool prefer_busy_poll); - - #else /* CONFIG_NET_RX_BUSY_POLL */ - static inline unsigned long net_busy_loop_on(void) -@@ -105,7 +105,8 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock) - unsigned int napi_id = READ_ONCE(sk->sk_napi_id); - - if (napi_id >= MIN_NAPI_ID) -- napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk); -+ napi_busy_loop(napi_id, nonblock ? NULL : sk_busy_loop_end, sk, -+ READ_ONCE(sk->sk_prefer_busy_poll)); - #endif - } - -diff --git a/include/net/sock.h b/include/net/sock.h -index 3c7addf95150..95311369567f 100644 ---- a/include/net/sock.h -+++ b/include/net/sock.h -@@ -301,6 +301,7 @@ struct bpf_local_storage; - * @sk_ack_backlog: current listen backlog - * @sk_max_ack_backlog: listen backlog set in listen() - * @sk_uid: user id of owner -+ * @sk_prefer_busy_poll: prefer busypolling over softirq processing - * @sk_priority: %SO_PRIORITY setting - * @sk_type: socket type (%SOCK_STREAM, etc) - * @sk_protocol: which protocol this socket belongs in this network family -@@ -479,6 +480,9 @@ struct sock { - u32 sk_ack_backlog; - u32 sk_max_ack_backlog; - kuid_t sk_uid; -+#ifdef CONFIG_NET_RX_BUSY_POLL -+ u8 sk_prefer_busy_poll; -+#endif - struct pid *sk_peer_pid; - const struct cred *sk_peer_cred; - long sk_rcvtimeo; -diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h -index 77f7c1638eb1..7dd02408b7ce 100644 ---- a/include/uapi/asm-generic/socket.h -+++ b/include/uapi/asm-generic/socket.h -@@ -119,6 +119,8 @@ - - #define SO_DETACH_REUSEPORT_BPF 68 - -+#define SO_PREFER_BUSY_POLL 69 -+ - #if !defined(__KERNEL__) - - #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) -diff --git a/net/core/dev.c b/net/core/dev.c -index 2fdf30eefc59..6b08de52bf0e 100644 ---- a/net/core/dev.c -+++ b/net/core/dev.c -@@ -6496,7 +6496,8 @@ bool napi_complete_done(struct napi_struct *n, int work_done) - - WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED)); - -- new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED); -+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED | -+ NAPIF_STATE_PREFER_BUSY_POLL); - - /* If STATE_MISSED was set, leave STATE_SCHED set, - * because we will call napi->poll() one more time. -@@ -6535,8 +6536,29 @@ static struct napi_struct *napi_by_id(unsigned int napi_id) - - #define BUSY_POLL_BUDGET 8 - --static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) -+static void __busy_poll_stop(struct napi_struct *napi, bool skip_schedule) - { -+ if (!skip_schedule) { -+ gro_normal_list(napi); -+ __napi_schedule(napi); -+ return; -+ } -+ -+ if (napi->gro_bitmask) { -+ /* flush too old packets -+ * If HZ < 1000, flush all packets. -+ */ -+ napi_gro_flush(napi, HZ >= 1000); -+ } -+ -+ gro_normal_list(napi); -+ clear_bit(NAPI_STATE_SCHED, &napi->state); -+} -+ -+static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock, bool prefer_busy_poll) -+{ -+ bool skip_schedule = false; -+ unsigned long timeout; - int rc; - - /* Busy polling means there is a high chance device driver hard irq -@@ -6553,6 +6575,15 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) - - local_bh_disable(); - -+ if (prefer_busy_poll) { -+ napi->defer_hard_irqs_count = READ_ONCE(napi->dev->napi_defer_hard_irqs); -+ timeout = READ_ONCE(napi->dev->gro_flush_timeout); -+ if (napi->defer_hard_irqs_count && timeout) { -+ hrtimer_start(&napi->timer, ns_to_ktime(timeout), HRTIMER_MODE_REL_PINNED); -+ skip_schedule = true; -+ } -+ } -+ - /* All we really want here is to re-enable device interrupts. - * Ideally, a new ndo_busy_poll_stop() could avoid another round. - */ -@@ -6563,19 +6594,14 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) - */ - trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); - netpoll_poll_unlock(have_poll_lock); -- if (rc == BUSY_POLL_BUDGET) { -- /* As the whole budget was spent, we still own the napi so can -- * safely handle the rx_list. -- */ -- gro_normal_list(napi); -- __napi_schedule(napi); -- } -+ if (rc == BUSY_POLL_BUDGET) -+ __busy_poll_stop(napi, skip_schedule); - local_bh_enable(); - } - - void napi_busy_loop(unsigned int napi_id, - bool (*loop_end)(void *, unsigned long), -- void *loop_end_arg) -+ void *loop_end_arg, bool prefer_busy_poll) - { - unsigned long start_time = loop_end ? busy_loop_current_time() : 0; - int (*napi_poll)(struct napi_struct *napi, int budget); -@@ -6603,12 +6629,18 @@ restart: - * we avoid dirtying napi->state as much as we can. - */ - if (val & (NAPIF_STATE_DISABLE | NAPIF_STATE_SCHED | -- NAPIF_STATE_IN_BUSY_POLL)) -+ NAPIF_STATE_IN_BUSY_POLL)) { -+ if (prefer_busy_poll) -+ set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); - goto count; -+ } - if (cmpxchg(&napi->state, val, - val | NAPIF_STATE_IN_BUSY_POLL | -- NAPIF_STATE_SCHED) != val) -+ NAPIF_STATE_SCHED) != val) { -+ if (prefer_busy_poll) -+ set_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); - goto count; -+ } - have_poll_lock = netpoll_poll_lock(napi); - napi_poll = napi->poll; - } -@@ -6626,7 +6658,7 @@ count: - - if (unlikely(need_resched())) { - if (napi_poll) -- busy_poll_stop(napi, have_poll_lock); -+ busy_poll_stop(napi, have_poll_lock, prefer_busy_poll); - preempt_enable(); - rcu_read_unlock(); - cond_resched(); -@@ -6637,7 +6669,7 @@ count: - cpu_relax(); - } - if (napi_poll) -- busy_poll_stop(napi, have_poll_lock); -+ busy_poll_stop(napi, have_poll_lock, prefer_busy_poll); - preempt_enable(); - out: - rcu_read_unlock(); -@@ -6688,8 +6720,10 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer) - * NAPI_STATE_MISSED, since we do not react to a device IRQ. - */ - if (!napi_disable_pending(napi) && -- !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) -+ !test_and_set_bit(NAPI_STATE_SCHED, &napi->state)) { -+ clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &napi->state); - __napi_schedule_irqoff(napi); -+ } - - return HRTIMER_NORESTART; - } -@@ -6747,6 +6781,7 @@ void napi_disable(struct napi_struct *n) - - hrtimer_cancel(&n->timer); - -+ clear_bit(NAPI_STATE_PREFER_BUSY_POLL, &n->state); - clear_bit(NAPI_STATE_DISABLE, &n->state); - } - EXPORT_SYMBOL(napi_disable); -@@ -6819,6 +6854,19 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll) - goto out_unlock; - } - -+ /* The NAPI context has more processing work, but busy-polling -+ * is preferred. Exit early. -+ */ -+ if (napi_prefer_busy_poll(n)) { -+ if (napi_complete_done(n, work)) { -+ /* If timeout is not set, we need to make sure -+ * that the NAPI is re-scheduled. -+ */ -+ napi_schedule(n); -+ } -+ goto out_unlock; -+ } -+ - if (n->gro_bitmask) { - /* flush too old packets - * If HZ < 1000, flush all packets. -diff --git a/net/core/sock.c b/net/core/sock.c -index 7de51ea15cdf..cf0e5fc3a8ba 100644 ---- a/net/core/sock.c -+++ b/net/core/sock.c -@@ -1167,6 +1167,12 @@ set_sndbuf: - sk->sk_ll_usec = val; - } - break; -+ case SO_PREFER_BUSY_POLL: -+ if (valbool && !capable(CAP_NET_ADMIN)) -+ ret = -EPERM; -+ else -+ WRITE_ONCE(sk->sk_prefer_busy_poll, valbool); -+ break; - #endif - - case SO_MAX_PACING_RATE: -@@ -1531,6 +1537,9 @@ int sock_getsockopt(struct socket *sock, int level, int optname, - case SO_BUSY_POLL: - v.val = sk->sk_ll_usec; - break; -+ case SO_PREFER_BUSY_POLL: -+ v.val = READ_ONCE(sk->sk_prefer_busy_poll); -+ break; - #endif - - case SO_MAX_PACING_RATE: --- -2.30.2 - diff --git a/queue-5.10/series b/queue-5.10/series index 3db827d0f78..7e26baf7b18 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -11,7 +11,6 @@ igb-check-if-num-of-q_vectors-is-smaller-than-max-be.patch igb-fix-position-of-assignment-to-ring.patch gve-fix-an-error-handling-path-in-gve_probe.patch net-add-kcov-handle-to-skb-extensions.patch -net-introduce-preferred-busy-polling.patch bonding-fix-suspicious-rcu-usage-in-bond_ipsec_add_s.patch bonding-fix-null-dereference-in-bond_ipsec_add_sa.patch ixgbevf-use-xso.real_dev-instead-of-xso.dev-in-callb.patch