5.10-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)
diff --git a/queue-5.10/block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch b/queue-5.10/block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch

new file mode 100644 (file)

index 0000000..f8b0244
--- /dev/null
+++ b/queue-5.10/block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch
@@ -0,0 +1,61 @@
+From 899ee2c3829c5ac14bfc7d3c4a5846c0b709b78f Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 13 Jun 2024 10:48:11 +0200
+Subject: block: initialize integrity buffer to zero before writing it to media
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 899ee2c3829c5ac14bfc7d3c4a5846c0b709b78f upstream.
+
+Metadata added by bio_integrity_prep is using plain kmalloc, which leads
+to random kernel memory being written media.  For PI metadata this is
+limited to the app tag that isn't used by kernel generated metadata,
+but for non-PI metadata the entire buffer leaks kernel memory.
+
+Fix this by adding the __GFP_ZERO flag to allocations for writes.
+
+Fixes: 7ba1ba12eeef ("block: Block layer data integrity support")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20240613084839.1044015-2-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bio-integrity.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/block/bio-integrity.c
++++ b/block/bio-integrity.c
+@@ -216,6 +216,7 @@ bool bio_integrity_prep(struct bio *bio)
+       unsigned int bytes, offset, i;
+       unsigned int intervals;
+       blk_status_t status;
++      gfp_t gfp = GFP_NOIO;
+ 
+       if (!bi)
+               return true;
+@@ -238,12 +239,20 @@ bool bio_integrity_prep(struct bio *bio)
+               if (!bi->profile->generate_fn ||
+                   !(bi->flags & BLK_INTEGRITY_GENERATE))
+                       return true;
++
++              /*
++               * Zero the memory allocated to not leak uninitialized kernel
++               * memory to disk.  For PI this only affects the app tag, but
++               * for non-integrity metadata it affects the entire metadata
++               * buffer.
++               */
++              gfp |= __GFP_ZERO;
+       }
+       intervals = bio_integrity_intervals(bi, bio_sectors(bio));
+ 
+       /* Allocate kernel buffer for protection data */
+       len = intervals * bi->tuple_size;
+-      buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
++      buf = kmalloc(len, gfp | q->bounce_gfp);
+       status = BLK_STS_RESOURCE;
+       if (unlikely(buf == NULL)) {
+               printk(KERN_ERR "could not allocate integrity buffer\n");
diff --git a/queue-5.10/bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch b/queue-5.10/bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch

new file mode 100644 (file)

index 0000000..c053640
--- /dev/null
+++ b/queue-5.10/bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch
@@ -0,0 +1,94 @@
+From stable+bounces-72956-greg=kroah.com@vger.kernel.org Wed Sep  4 03:36:41 2024
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+Date: Tue, 3 Sep 2024 18:28:51 -0700
+Subject: bpf, cgroup: Assign cgroup in cgroup_sk_alloc when called from interrupt
+To: <stable@vger.kernel.org>
+Cc: <martin.kelly@crowdstrike.com>, Daniel Borkmann <daniel@iogearbox.net>, Connor O'Brien <connor.obrien@crowdstrike.com>
+Message-ID: <20240904012851.58167-2-connor.obrien@crowdstrike.com>
+
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 78cc316e9583067884eb8bd154301dc1e9ee945c upstream.
+
+If cgroup_sk_alloc() is called from interrupt context, then just assign the
+root cgroup to skcd->cgroup. Prior to commit 8520e224f547 ("bpf, cgroups:
+Fix cgroup v2 fallback on v1/v2 mixed mode") we would just return, and later
+on in sock_cgroup_ptr(), we were NULL-testing the cgroup in fast-path, and
+iff indeed NULL returning the root cgroup (v ?: &cgrp_dfl_root.cgrp). Rather
+than re-adding the NULL-test to the fast-path we can just assign it once from
+cgroup_sk_alloc() given v1/v2 handling has been simplified. The migration from
+NULL test with returning &cgrp_dfl_root.cgrp to assigning &cgrp_dfl_root.cgrp
+directly does /not/ change behavior for callers of sock_cgroup_ptr().
+
+syzkaller was able to trigger a splat in the legacy netrom code base, where
+the RX handler in nr_rx_frame() calls nr_make_new() which calls sk_alloc()
+and therefore cgroup_sk_alloc() with in_interrupt() condition. Thus the NULL
+skcd->cgroup, where it trips over on cgroup_sk_free() side given it expects
+a non-NULL object. There are a few other candidates aside from netrom which
+have similar pattern where in their accept-like implementation, they just call
+to sk_alloc() and thus cgroup_sk_alloc() instead of sk_clone_lock() with the
+corresponding cgroup_sk_clone() which then inherits the cgroup from the parent
+socket. None of them are related to core protocols where BPF cgroup programs
+are running from. However, in future, they should follow to implement a similar
+inheritance mechanism.
+
+Additionally, with a !CONFIG_CGROUP_NET_PRIO and !CONFIG_CGROUP_NET_CLASSID
+configuration, the same issue was exposed also prior to 8520e224f547 due to
+commit e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated
+cgroup") which added the early in_interrupt() return back then.
+
+Fixes: 8520e224f547 ("bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode")
+Fixes: e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated cgroup")
+Reported-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com
+Reported-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Tested-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com
+Tested-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/bpf/20210927123921.21535-1-daniel@iogearbox.net
+Signed-off-by: Connor O'Brien <connor.obrien@crowdstrike.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup.c |   17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6559,22 +6559,29 @@ int cgroup_parse_float(const char *input
+ 
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ {
+-      /* Don't associate the sock with unrelated interrupted task's cgroup. */
+-      if (in_interrupt())
+-              return;
++      struct cgroup *cgroup;
+ 
+       rcu_read_lock();
++      /* Don't associate the sock with unrelated interrupted task's cgroup. */
++      if (in_interrupt()) {
++              cgroup = &cgrp_dfl_root.cgrp;
++              cgroup_get(cgroup);
++              goto out;
++      }
++
+       while (true) {
+               struct css_set *cset;
+ 
+               cset = task_css_set(current);
+               if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+-                      skcd->cgroup = cset->dfl_cgrp;
+-                      cgroup_bpf_get(cset->dfl_cgrp);
++                      cgroup = cset->dfl_cgrp;
+                       break;
+               }
+               cpu_relax();
+       }
++out:
++      skcd->cgroup = cgroup;
++      cgroup_bpf_get(cgroup);
+       rcu_read_unlock();
+ }
+ 
diff --git a/queue-5.10/bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch b/queue-5.10/bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch

new file mode 100644 (file)

index 0000000..4122a52
--- /dev/null
+++ b/queue-5.10/bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch
@@ -0,0 +1,392 @@
+From stable+bounces-72955-greg=kroah.com@vger.kernel.org Wed Sep  4 03:36:18 2024
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+Date: Tue, 3 Sep 2024 18:28:50 -0700
+Subject: bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode
+To: <stable@vger.kernel.org>
+Cc: <martin.kelly@crowdstrike.com>, Daniel Borkmann <daniel@iogearbox.net>, Connor O'Brien <connor.obrien@crowdstrike.com>
+Message-ID: <20240904012851.58167-1-connor.obrien@crowdstrike.com>
+
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa upstream.
+
+Fix cgroup v1 interference when non-root cgroup v2 BPF programs are used.
+Back in the days, commit bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+embedded per-socket cgroup information into sock->sk_cgrp_data and in order
+to save 8 bytes in struct sock made both mutually exclusive, that is, when
+cgroup v1 socket tagging (e.g. net_cls/net_prio) is used, then cgroup v2
+falls back to the root cgroup in sock_cgroup_ptr() (&cgrp_dfl_root.cgrp).
+
+The assumption made was "there is no reason to mix the two and this is in line
+with how legacy and v2 compatibility is handled" as stated in bd1060a1d671.
+However, with Kubernetes more widely supporting cgroups v2 as well nowadays,
+this assumption no longer holds, and the possibility of the v1/v2 mixed mode
+with the v2 root fallback being hit becomes a real security issue.
+
+Many of the cgroup v2 BPF programs are also used for policy enforcement, just
+to pick _one_ example, that is, to programmatically deny socket related system
+calls like connect(2) or bind(2). A v2 root fallback would implicitly cause
+a policy bypass for the affected Pods.
+
+In production environments, we have recently seen this case due to various
+circumstances: i) a different 3rd party agent and/or ii) a container runtime
+such as [0] in the user's environment configuring legacy cgroup v1 net_cls
+tags, which triggered implicitly mentioned root fallback. Another case is
+Kubernetes projects like kind [1] which create Kubernetes nodes in a container
+and also add cgroup namespaces to the mix, meaning programs which are attached
+to the cgroup v2 root of the cgroup namespace get attached to a non-root
+cgroup v2 path from init namespace point of view. And the latter's root is
+out of reach for agents on a kind Kubernetes node to configure. Meaning, any
+entity on the node setting cgroup v1 net_cls tag will trigger the bypass
+despite cgroup v2 BPF programs attached to the namespace root.
+
+Generally, this mutual exclusiveness does not hold anymore in today's user
+environments and makes cgroup v2 usage from BPF side fragile and unreliable.
+This fix adds proper struct cgroup pointer for the cgroup v2 case to struct
+sock_cgroup_data in order to address these issues; this implicitly also fixes
+the tradeoffs being made back then with regards to races and refcount leaks
+as stated in bd1060a1d671, and removes the fallback, so that cgroup v2 BPF
+programs always operate as expected.
+
+  [0] https://github.com/nestybox/sysbox/
+  [1] https://kind.sigs.k8s.io/
+
+Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/bpf/20210913230759.2313-1-daniel@iogearbox.net
+[resolve trivial conflicts]
+Signed-off-by: Connor O'Brien <connor.obrien@crowdstrike.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cgroup-defs.h  |  107 ++++++++++---------------------------------
+ include/linux/cgroup.h       |   22 --------
+ kernel/cgroup/cgroup.c       |   50 ++++----------------
+ net/core/netclassid_cgroup.c |    7 --
+ net/core/netprio_cgroup.c    |   10 ----
+ 5 files changed, 41 insertions(+), 155 deletions(-)
+
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -764,107 +764,54 @@ static inline void cgroup_threadgroup_ch
+  * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
+  * per-socket cgroup information except for memcg association.
+  *
+- * On legacy hierarchies, net_prio and net_cls controllers directly set
+- * attributes on each sock which can then be tested by the network layer.
+- * On the default hierarchy, each sock is associated with the cgroup it was
+- * created in and the networking layer can match the cgroup directly.
+- *
+- * To avoid carrying all three cgroup related fields separately in sock,
+- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
+- * On boot, sock_cgroup_data records the cgroup that the sock was created
+- * in so that cgroup2 matches can be made; however, once either net_prio or
+- * net_cls starts being used, the area is overriden to carry prioidx and/or
+- * classid.  The two modes are distinguished by whether the lowest bit is
+- * set.  Clear bit indicates cgroup pointer while set bit prioidx and
+- * classid.
+- *
+- * While userland may start using net_prio or net_cls at any time, once
+- * either is used, cgroup2 matching no longer works.  There is no reason to
+- * mix the two and this is in line with how legacy and v2 compatibility is
+- * handled.  On mode switch, cgroup references which are already being
+- * pointed to by socks may be leaked.  While this can be remedied by adding
+- * synchronization around sock_cgroup_data, given that the number of leaked
+- * cgroups is bound and highly unlikely to be high, this seems to be the
+- * better trade-off.
++ * On legacy hierarchies, net_prio and net_cls controllers directly
++ * set attributes on each sock which can then be tested by the network
++ * layer. On the default hierarchy, each sock is associated with the
++ * cgroup it was created in and the networking layer can match the
++ * cgroup directly.
+  */
+ struct sock_cgroup_data {
+-      union {
+-#ifdef __LITTLE_ENDIAN
+-              struct {
+-                      u8      is_data : 1;
+-                      u8      no_refcnt : 1;
+-                      u8      unused : 6;
+-                      u8      padding;
+-                      u16     prioidx;
+-                      u32     classid;
+-              } __packed;
+-#else
+-              struct {
+-                      u32     classid;
+-                      u16     prioidx;
+-                      u8      padding;
+-                      u8      unused : 6;
+-                      u8      no_refcnt : 1;
+-                      u8      is_data : 1;
+-              } __packed;
++      struct cgroup   *cgroup; /* v2 */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++      u32             classid; /* v1 */
++#endif
++#ifdef CONFIG_CGROUP_NET_PRIO
++      u16             prioidx; /* v1 */
+ #endif
+-              u64             val;
+-      };
+ };
+ 
+-/*
+- * There's a theoretical window where the following accessors race with
+- * updaters and return part of the previous pointer as the prioidx or
+- * classid.  Such races are short-lived and the result isn't critical.
+- */
+ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
+ {
+-      /* fallback to 1 which is always the ID of the root cgroup */
+-      return (skcd->is_data & 1) ? skcd->prioidx : 1;
++#ifdef CONFIG_CGROUP_NET_PRIO
++      return READ_ONCE(skcd->prioidx);
++#else
++      return 1;
++#endif
+ }
+ 
+ static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
+ {
+-      /* fallback to 0 which is the unconfigured default classid */
+-      return (skcd->is_data & 1) ? skcd->classid : 0;
++#ifdef CONFIG_CGROUP_NET_CLASSID
++      return READ_ONCE(skcd->classid);
++#else
++      return 0;
++#endif
+ }
+ 
+-/*
+- * If invoked concurrently, the updaters may clobber each other.  The
+- * caller is responsible for synchronization.
+- */
+ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
+                                          u16 prioidx)
+ {
+-      struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+-      if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
+-              return;
+-
+-      if (!(skcd_buf.is_data & 1)) {
+-              skcd_buf.val = 0;
+-              skcd_buf.is_data = 1;
+-      }
+-
+-      skcd_buf.prioidx = prioidx;
+-      WRITE_ONCE(skcd->val, skcd_buf.val);    /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_PRIO
++      WRITE_ONCE(skcd->prioidx, prioidx);
++#endif
+ }
+ 
+ static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
+                                          u32 classid)
+ {
+-      struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+-      if (sock_cgroup_classid(&skcd_buf) == classid)
+-              return;
+-
+-      if (!(skcd_buf.is_data & 1)) {
+-              skcd_buf.val = 0;
+-              skcd_buf.is_data = 1;
+-      }
+-
+-      skcd_buf.classid = classid;
+-      WRITE_ONCE(skcd->val, skcd_buf.val);    /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++      WRITE_ONCE(skcd->classid, classid);
++#endif
+ }
+ 
+ #else /* CONFIG_SOCK_CGROUP_DATA */
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -816,33 +816,13 @@ static inline void cgroup_account_cputim
+  */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+ 
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-extern spinlock_t cgroup_sk_update_lock;
+-#endif
+-
+-void cgroup_sk_alloc_disable(void);
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd);
+ void cgroup_sk_free(struct sock_cgroup_data *skcd);
+ 
+ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
+ {
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-      unsigned long v;
+-
+-      /*
+-       * @skcd->val is 64bit but the following is safe on 32bit too as we
+-       * just need the lower ulong to be written and read atomically.
+-       */
+-      v = READ_ONCE(skcd->val);
+-
+-      if (v & 3)
+-              return &cgrp_dfl_root.cgrp;
+-
+-      return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
+-#else
+-      return (struct cgroup *)(unsigned long)skcd->val;
+-#endif
++      return skcd->cgroup;
+ }
+ 
+ #else /* CONFIG_CGROUP_DATA */
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6557,74 +6557,44 @@ int cgroup_parse_float(const char *input
+  */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+ 
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-
+-DEFINE_SPINLOCK(cgroup_sk_update_lock);
+-static bool cgroup_sk_alloc_disabled __read_mostly;
+-
+-void cgroup_sk_alloc_disable(void)
+-{
+-      if (cgroup_sk_alloc_disabled)
+-              return;
+-      pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
+-      cgroup_sk_alloc_disabled = true;
+-}
+-
+-#else
+-
+-#define cgroup_sk_alloc_disabled      false
+-
+-#endif
+-
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ {
+-      if (cgroup_sk_alloc_disabled) {
+-              skcd->no_refcnt = 1;
+-              return;
+-      }
+-
+       /* Don't associate the sock with unrelated interrupted task's cgroup. */
+       if (in_interrupt())
+               return;
+ 
+       rcu_read_lock();
+-
+       while (true) {
+               struct css_set *cset;
+ 
+               cset = task_css_set(current);
+               if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+-                      skcd->val = (unsigned long)cset->dfl_cgrp;
++                      skcd->cgroup = cset->dfl_cgrp;
+                       cgroup_bpf_get(cset->dfl_cgrp);
+                       break;
+               }
+               cpu_relax();
+       }
+-
+       rcu_read_unlock();
+ }
+ 
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
+ {
+-      if (skcd->val) {
+-              if (skcd->no_refcnt)
+-                      return;
+-              /*
+-               * We might be cloning a socket which is left in an empty
+-               * cgroup and the cgroup might have already been rmdir'd.
+-               * Don't use cgroup_get_live().
+-               */
+-              cgroup_get(sock_cgroup_ptr(skcd));
+-              cgroup_bpf_get(sock_cgroup_ptr(skcd));
+-      }
++      struct cgroup *cgrp = sock_cgroup_ptr(skcd);
++
++      /*
++       * We might be cloning a socket which is left in an empty
++       * cgroup and the cgroup might have already been rmdir'd.
++       * Don't use cgroup_get_live().
++       */
++      cgroup_get(cgrp);
++      cgroup_bpf_get(cgrp);
+ }
+ 
+ void cgroup_sk_free(struct sock_cgroup_data *skcd)
+ {
+       struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+ 
+-      if (skcd->no_refcnt)
+-              return;
+       cgroup_bpf_put(cgrp);
+       cgroup_put(cgrp);
+ }
+--- a/net/core/netclassid_cgroup.c
++++ b/net/core/netclassid_cgroup.c
+@@ -72,11 +72,8 @@ static int update_classid_sock(const voi
+       struct update_classid_context *ctx = (void *)v;
+       struct socket *sock = sock_from_file(file, &err);
+ 
+-      if (sock) {
+-              spin_lock(&cgroup_sk_update_lock);
++      if (sock)
+               sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
+-              spin_unlock(&cgroup_sk_update_lock);
+-      }
+       if (--ctx->batch == 0) {
+               ctx->batch = UPDATE_CLASSID_BATCH;
+               return n + 1;
+@@ -122,8 +119,6 @@ static int write_classid(struct cgroup_s
+       struct css_task_iter it;
+       struct task_struct *p;
+ 
+-      cgroup_sk_alloc_disable();
+-
+       cs->classid = (u32)value;
+ 
+       css_task_iter_start(css, 0, &it);
+--- a/net/core/netprio_cgroup.c
++++ b/net/core/netprio_cgroup.c
+@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kern
+       if (!dev)
+               return -ENODEV;
+ 
+-      cgroup_sk_alloc_disable();
+-
+       rtnl_lock();
+ 
+       ret = netprio_set_prio(of_css(of), dev, prio);
+@@ -222,12 +220,10 @@ static int update_netprio(const void *v,
+ {
+       int err;
+       struct socket *sock = sock_from_file(file, &err);
+-      if (sock) {
+-              spin_lock(&cgroup_sk_update_lock);
++
++      if (sock)
+               sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
+                                       (unsigned long)v);
+-              spin_unlock(&cgroup_sk_update_lock);
+-      }
+       return 0;
+ }
+ 
+@@ -236,8 +232,6 @@ static void net_prio_attach(struct cgrou
+       struct task_struct *p;
+       struct cgroup_subsys_state *css;
+ 
+-      cgroup_sk_alloc_disable();
+-
+       cgroup_taskset_for_each(p, css, tset) {
+               void *v = (void *)(unsigned long)css->id;
+ 
diff --git a/queue-5.10/drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch b/queue-5.10/drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch

new file mode 100644 (file)

index 0000000..c95f132
--- /dev/null
+++ b/queue-5.10/drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch
@@ -0,0 +1,112 @@
+From 50151b7f1c79a09117837eb95b76c2de76841dab Mon Sep 17 00:00:00 2001
+From: Bob Zhou <bob.zhou@amd.com>
+Date: Fri, 31 May 2024 15:01:22 +0800
+Subject: drm/amd/pm: Fix the null pointer dereference for vega10_hwmgr
+
+From: Bob Zhou <bob.zhou@amd.com>
+
+commit 50151b7f1c79a09117837eb95b76c2de76841dab upstream.
+
+Check return value and conduct null pointer handling to avoid null pointer dereference.
+
+Signed-off-by: Bob Zhou <bob.zhou@amd.com>
+Reviewed-by: Tim Huang <Tim.Huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Mukul Sikka <mukul.sikka@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c |   30 +++++++++++++++---
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+@@ -3410,13 +3410,17 @@ static int vega10_find_dpm_states_clocks
+       const struct vega10_power_state *vega10_ps =
+                       cast_const_phw_vega10_power_state(states->pnew_state);
+       struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
+-      uint32_t sclk = vega10_ps->performance_levels
+-                      [vega10_ps->performance_level_count - 1].gfx_clock;
+       struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
+-      uint32_t mclk = vega10_ps->performance_levels
+-                      [vega10_ps->performance_level_count - 1].mem_clock;
++      uint32_t sclk, mclk;
+       uint32_t i;
+ 
++      if (vega10_ps == NULL)
++              return -EINVAL;
++      sclk = vega10_ps->performance_levels
++                      [vega10_ps->performance_level_count - 1].gfx_clock;
++      mclk = vega10_ps->performance_levels
++                      [vega10_ps->performance_level_count - 1].mem_clock;
++
+       for (i = 0; i < sclk_table->count; i++) {
+               if (sclk == sclk_table->dpm_levels[i].value)
+                       break;
+@@ -3723,6 +3727,9 @@ static int vega10_generate_dpm_level_ena
+                       cast_const_phw_vega10_power_state(states->pnew_state);
+       int i;
+ 
++      if (vega10_ps == NULL)
++              return -EINVAL;
++
+       PP_ASSERT_WITH_CODE(!vega10_trim_dpm_states(hwmgr, vega10_ps),
+                       "Attempt to Trim DPM States Failed!",
+                       return -1);
+@@ -4858,6 +4865,9 @@ static int vega10_check_states_equal(str
+ 
+       psa = cast_const_phw_vega10_power_state(pstate1);
+       psb = cast_const_phw_vega10_power_state(pstate2);
++      if (psa == NULL || psb == NULL)
++              return -EINVAL;
++
+       /* If the two states don't even have the same number of performance levels they cannot be the same state. */
+       if (psa->performance_level_count != psb->performance_level_count) {
+               *equal = false;
+@@ -4983,6 +4993,8 @@ static int vega10_set_sclk_od(struct pp_
+               return -EINVAL;
+ 
+       vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++      if (vega10_ps == NULL)
++              return -EINVAL;
+ 
+       vega10_ps->performance_levels
+       [vega10_ps->performance_level_count - 1].gfx_clock =
+@@ -5034,6 +5046,8 @@ static int vega10_set_mclk_od(struct pp_
+               return -EINVAL;
+ 
+       vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++      if (vega10_ps == NULL)
++              return -EINVAL;
+ 
+       vega10_ps->performance_levels
+       [vega10_ps->performance_level_count - 1].mem_clock =
+@@ -5269,6 +5283,9 @@ static void vega10_odn_update_power_stat
+               return;
+ 
+       vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++      if (vega10_ps == NULL)
++              return;
++
+       max_level = vega10_ps->performance_level_count - 1;
+ 
+       if (vega10_ps->performance_levels[max_level].gfx_clock !=
+@@ -5291,6 +5308,9 @@ static void vega10_odn_update_power_stat
+ 
+       ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1));
+       vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++      if (vega10_ps == NULL)
++              return;
++
+       max_level = vega10_ps->performance_level_count - 1;
+ 
+       if (vega10_ps->performance_levels[max_level].gfx_clock !=
+@@ -5481,6 +5501,8 @@ static int vega10_get_performance_level(
+               return -EINVAL;
+ 
+       ps = cast_const_phw_vega10_power_state(state);
++      if (ps == NULL)
++              return -EINVAL;
+ 
+       i = index > ps->performance_level_count - 1 ?
+                       ps->performance_level_count - 1 : index;
diff --git a/queue-5.10/ext4-handle-redirtying-in-ext4_bio_write_page.patch b/queue-5.10/ext4-handle-redirtying-in-ext4_bio_write_page.patch

new file mode 100644 (file)

index 0000000..1814e78
--- /dev/null
+++ b/queue-5.10/ext4-handle-redirtying-in-ext4_bio_write_page.patch
@@ -0,0 +1,67 @@
+From 04e568a3b31cfbd545c04c8bfc35c20e5ccfce0f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 7 Dec 2022 12:27:04 +0100
+Subject: ext4: handle redirtying in ext4_bio_write_page()
+
+From: Jan Kara <jack@suse.cz>
+
+commit 04e568a3b31cfbd545c04c8bfc35c20e5ccfce0f upstream.
+
+Since we want to transition transaction commits to use ext4_writepages()
+for writing back ordered, add handling of page redirtying into
+ext4_bio_write_page(). Also move buffer dirty bit clearing into the same
+place other buffer state handling.
+
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20221207112722.22220-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/page-io.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -493,6 +493,13 @@ int ext4_bio_write_page(struct ext4_io_s
+                       /* A hole? We can safely clear the dirty bit */
+                       if (!buffer_mapped(bh))
+                               clear_buffer_dirty(bh);
++                      /*
++                       * Keeping dirty some buffer we cannot write? Make
++                       * sure to redirty the page. This happens e.g. when
++                       * doing writeout for transaction commit.
++                       */
++                      if (buffer_dirty(bh) && !PageDirty(page))
++                              redirty_page_for_writepage(wbc, page);
+                       if (io->io_bio)
+                               ext4_io_submit(io);
+                       continue;
+@@ -500,6 +507,7 @@ int ext4_bio_write_page(struct ext4_io_s
+               if (buffer_new(bh))
+                       clear_buffer_new(bh);
+               set_buffer_async_write(bh);
++              clear_buffer_dirty(bh);
+               nr_to_submit++;
+       } while ((bh = bh->b_this_page) != head);
+ 
+@@ -542,7 +550,10 @@ int ext4_bio_write_page(struct ext4_io_s
+                       printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
+                       redirty_page_for_writepage(wbc, page);
+                       do {
+-                              clear_buffer_async_write(bh);
++                              if (buffer_async_write(bh)) {
++                                      clear_buffer_async_write(bh);
++                                      set_buffer_dirty(bh);
++                              }
+                               bh = bh->b_this_page;
+                       } while (bh != head);
+                       goto unlock;
+@@ -555,7 +566,6 @@ int ext4_bio_write_page(struct ext4_io_s
+                       continue;
+               io_submit_add_bh(io, inode, page, bounce_page, bh);
+               nr_submitted++;
+-              clear_buffer_dirty(bh);
+       } while ((bh = bh->b_this_page) != head);
+ 
+ unlock:
diff --git a/queue-5.10/net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch b/queue-5.10/net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch

new file mode 100644 (file)

index 0000000..02fd471
--- /dev/null
+++ b/queue-5.10/net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch
@@ -0,0 +1,87 @@
+From 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 Mon Sep 17 00:00:00 2001
+From: Stanislav Fomichev <sdf@google.com>
+Date: Wed, 8 Nov 2023 13:13:25 -0800
+Subject: net: set SOCK_RCU_FREE before inserting socket into hashtable
+
+From: Stanislav Fomichev <sdf@google.com>
+
+commit 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 upstream.
+
+We've started to see the following kernel traces:
+
+ WARNING: CPU: 83 PID: 0 at net/core/filter.c:6641 sk_lookup+0x1bd/0x1d0
+
+ Call Trace:
+  <IRQ>
+  __bpf_skc_lookup+0x10d/0x120
+  bpf_sk_lookup+0x48/0xd0
+  bpf_sk_lookup_tcp+0x19/0x20
+  bpf_prog_<redacted>+0x37c/0x16a3
+  cls_bpf_classify+0x205/0x2e0
+  tcf_classify+0x92/0x160
+  __netif_receive_skb_core+0xe52/0xf10
+  __netif_receive_skb_list_core+0x96/0x2b0
+  napi_complete_done+0x7b5/0xb70
+  <redacted>_poll+0x94/0xb0
+  net_rx_action+0x163/0x1d70
+  __do_softirq+0xdc/0x32e
+  asm_call_irq_on_stack+0x12/0x20
+  </IRQ>
+  do_softirq_own_stack+0x36/0x50
+  do_softirq+0x44/0x70
+
+__inet_hash can race with lockless (rcu) readers on the other cpus:
+
+  __inet_hash
+    __sk_nulls_add_node_rcu
+    <- (bpf triggers here)
+    sock_set_flag(SOCK_RCU_FREE)
+
+Let's move the SOCK_RCU_FREE part up a bit, before we are inserting
+the socket into hashtables. Note, that the race is really harmless;
+the bpf callers are handling this situation (where listener socket
+doesn't have SOCK_RCU_FREE set) correctly, so the only
+annoyance is a WARN_ONCE.
+
+More details from Eric regarding SOCK_RCU_FREE timeline:
+
+Commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under
+synflood") added SOCK_RCU_FREE. At that time, the precise location of
+sock_set_flag(sk, SOCK_RCU_FREE) did not matter, because the thread calling
+__inet_hash() owns a reference on sk. SOCK_RCU_FREE was only tested
+at dismantle time.
+
+Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+started checking SOCK_RCU_FREE _after_ the lookup to infer whether
+the refcount has been taken care of.
+
+Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Stanislav Fomichev <sdf@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Resolved conflict for 5.10 and below.]
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_hashtables.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -653,6 +653,7 @@ int __inet_hash(struct sock *sk, struct
+               if (err)
+                       goto unlock;
+       }
++      sock_set_flag(sk, SOCK_RCU_FREE);
+       if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+               sk->sk_family == AF_INET6)
+               __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+@@ -660,7 +661,6 @@ int __inet_hash(struct sock *sk, struct
+               __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+       inet_hash2(hashinfo, sk);
+       ilb->count++;
+-      sock_set_flag(sk, SOCK_RCU_FREE);
+       sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ unlock:
+       spin_unlock(&ilb->lock);
diff --git a/queue-5.10/rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch b/queue-5.10/rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch

new file mode 100644 (file)

index 0000000..37f9a7b
--- /dev/null
+++ b/queue-5.10/rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch
@@ -0,0 +1,42 @@
+From cc5645fddb0ce28492b15520306d092730dffa48 Mon Sep 17 00:00:00 2001
+From: Nikita Kiryushin <kiryushin@ancud.ru>
+Date: Wed, 27 Mar 2024 20:47:47 +0300
+Subject: rcu-tasks: Fix show_rcu_tasks_trace_gp_kthread buffer overflow
+
+From: Nikita Kiryushin <kiryushin@ancud.ru>
+
+commit cc5645fddb0ce28492b15520306d092730dffa48 upstream.
+
+There is a possibility of buffer overflow in
+show_rcu_tasks_trace_gp_kthread() if counters, passed
+to sprintf() are huge. Counter numbers, needed for this
+are unrealistically high, but buffer overflow is still
+possible.
+
+Use snprintf() with buffer size instead of sprintf().
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: edf3775f0ad6 ("rcu-tasks: Add count for idle tasks on offline CPUs")
+Signed-off-by: Nikita Kiryushin <kiryushin@ancud.ru>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Vamsi Krishna Brahmajosyula <vamsi-krishna.brahmajosyula@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tasks.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -1240,7 +1240,7 @@ static void show_rcu_tasks_trace_gp_kthr
+ {
+       char buf[64];
+ 
+-      sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
++      snprintf(buf, sizeof(buf), "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
+               data_race(n_heavy_reader_ofl_updates),
+               data_race(n_heavy_reader_updates),
+               data_race(n_heavy_reader_attempts));
diff --git a/queue-5.10/series b/queue-5.10/series

index 7a76d30e7cd80db3f3938847562a914a675e20a6..d9e7c6de13052e9f2feba6166a758c21bf08bc1d 100644 (file)
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -41,3 +41,12 @@ wifi-cfg80211-make-hash-table-duplicates-more-surviv.patch
  block-remove-the-blk_flush_integrity-call-in-blk_int.patch
  drm-amd-display-skip-wbscl_set_scaler_filter-if-filt.patch
  media-uvcvideo-enforce-alignment-of-frame-and-interv.patch
+block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch
+drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch
+bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch
+net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch
+virtio_net-fix-napi_skb_cache_put-warning.patch
+rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch
+udf-limit-file-size-to-4tb.patch
+ext4-handle-redirtying-in-ext4_bio_write_page.patch
+bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch
diff --git a/queue-5.10/udf-limit-file-size-to-4tb.patch b/queue-5.10/udf-limit-file-size-to-4tb.patch

new file mode 100644 (file)

index 0000000..c8c36b5
--- /dev/null
+++ b/queue-5.10/udf-limit-file-size-to-4tb.patch
@@ -0,0 +1,46 @@
+From c2efd13a2ed4f29bf9ef14ac2fbb7474084655f8 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 25 Jan 2023 17:56:06 +0100
+Subject: udf: Limit file size to 4TB
+
+From: Jan Kara <jack@suse.cz>
+
+commit c2efd13a2ed4f29bf9ef14ac2fbb7474084655f8 upstream.
+
+UDF disk format supports in principle file sizes up to 1<<64-1. However
+the file space (including holes) is described by a linked list of
+extents, each of which can have at most 1GB. Thus the creation and
+handling of extents gets unusably slow beyond certain point. Limit the
+file size to 4TB to avoid locking up the kernel too easily.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/udf/super.c |    9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -86,6 +86,13 @@ enum {
+ #define UDF_MAX_LVID_NESTING 1000
+ 
+ enum { UDF_MAX_LINKS = 0xffff };
++/*
++ * We limit filesize to 4TB. This is arbitrary as the on-disk format supports
++ * more but because the file space is described by a linked list of extents,
++ * each of which can have at most 1GB, the creation and handling of extents
++ * gets unusably slow beyond certain point...
++ */
++#define UDF_MAX_FILESIZE (1ULL << 42)
+ 
+ /* These are the "meat" - everything else is stuffing */
+ static int udf_fill_super(struct super_block *, void *, int);
+@@ -2301,7 +2308,7 @@ static int udf_fill_super(struct super_b
+               ret = -ENOMEM;
+               goto error_out;
+       }
+-      sb->s_maxbytes = MAX_LFS_FILESIZE;
++      sb->s_maxbytes = UDF_MAX_FILESIZE;
+       sb->s_max_links = UDF_MAX_LINKS;
+       return 0;
+ 
diff --git a/queue-5.10/virtio_net-fix-napi_skb_cache_put-warning.patch b/queue-5.10/virtio_net-fix-napi_skb_cache_put-warning.patch

new file mode 100644 (file)

index 0000000..ffca079
--- /dev/null
+++ b/queue-5.10/virtio_net-fix-napi_skb_cache_put-warning.patch
@@ -0,0 +1,88 @@
+From f8321fa75102246d7415a6af441872f6637c93ab Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 12 Jul 2024 04:53:25 -0700
+Subject: virtio_net: Fix napi_skb_cache_put warning
+
+From: Breno Leitao <leitao@debian.org>
+
+commit f8321fa75102246d7415a6af441872f6637c93ab upstream.
+
+After the commit bdacf3e34945 ("net: Use nested-BH locking for
+napi_alloc_cache.") was merged, the following warning began to appear:
+
+        WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
+
+         __warn+0x12f/0x340
+         napi_skb_cache_put+0x82/0x4b0
+         napi_skb_cache_put+0x82/0x4b0
+         report_bug+0x165/0x370
+         handle_bug+0x3d/0x80
+         exc_invalid_op+0x1a/0x50
+         asm_exc_invalid_op+0x1a/0x20
+         __free_old_xmit+0x1c8/0x510
+         napi_skb_cache_put+0x82/0x4b0
+         __free_old_xmit+0x1c8/0x510
+         __free_old_xmit+0x1c8/0x510
+         __pfx___free_old_xmit+0x10/0x10
+
+The issue arises because virtio is assuming it's running in NAPI context
+even when it's not, such as in the netpoll case.
+
+To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
+is available. Same for virtnet_poll_cleantx(), which always assumed that
+it was in a NAPI context.
+
+Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Heng Qi <hengqi@linux.alibaba.com>
+Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Shivani: Modified to apply on v4.19.y-v5.10.y]
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1497,7 +1497,7 @@ static bool is_xdp_raw_buffer_queue(stru
+               return false;
+ }
+ 
+-static void virtnet_poll_cleantx(struct receive_queue *rq)
++static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
+ {
+       struct virtnet_info *vi = rq->vq->vdev->priv;
+       unsigned int index = vq2rxq(rq->vq);
+@@ -1508,7 +1508,7 @@ static void virtnet_poll_cleantx(struct
+               return;
+ 
+       if (__netif_tx_trylock(txq)) {
+-              free_old_xmit_skbs(sq, true);
++              free_old_xmit_skbs(sq, !!budget);
+               __netif_tx_unlock(txq);
+       }
+ 
+@@ -1525,7 +1525,7 @@ static int virtnet_poll(struct napi_stru
+       unsigned int received;
+       unsigned int xdp_xmit = 0;
+ 
+-      virtnet_poll_cleantx(rq);
++      virtnet_poll_cleantx(rq, budget);
+ 
+       received = virtnet_receive(rq, budget, &xdp_xmit);
+ 
+@@ -1598,7 +1598,7 @@ static int virtnet_poll_tx(struct napi_s
+       txq = netdev_get_tx_queue(vi->dev, index);
+       __netif_tx_lock(txq, raw_smp_processor_id());
+       virtqueue_disable_cb(sq->vq);
+-      free_old_xmit_skbs(sq, true);
++      free_old_xmit_skbs(sq, !!budget);
+ 
+       opaque = virtqueue_enable_cb_prepare(sq->vq);
+
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 5 Sep 2024 08:15:22 +0000 (10:15 +0200)
queue-5.10/block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/ext4-handle-redirtying-in-ext4_bio_write_page.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/series		patch \| blob \| blame \| history
queue-5.10/udf-limit-file-size-to-4tb.patch	[new file with mode: 0644]	patch \| blob
queue-5.10/virtio_net-fix-napi_skb_cache_put-warning.patch	[new file with mode: 0644]	patch \| blob