--- /dev/null
+From 899ee2c3829c5ac14bfc7d3c4a5846c0b709b78f Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Thu, 13 Jun 2024 10:48:11 +0200
+Subject: block: initialize integrity buffer to zero before writing it to media
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 899ee2c3829c5ac14bfc7d3c4a5846c0b709b78f upstream.
+
+Metadata added by bio_integrity_prep is using plain kmalloc, which leads
+to random kernel memory being written media. For PI metadata this is
+limited to the app tag that isn't used by kernel generated metadata,
+but for non-PI metadata the entire buffer leaks kernel memory.
+
+Fix this by adding the __GFP_ZERO flag to allocations for writes.
+
+Fixes: 7ba1ba12eeef ("block: Block layer data integrity support")
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Link: https://lore.kernel.org/r/20240613084839.1044015-2-hch@lst.de
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/bio-integrity.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/block/bio-integrity.c
++++ b/block/bio-integrity.c
+@@ -216,6 +216,7 @@ bool bio_integrity_prep(struct bio *bio)
+ unsigned int bytes, offset, i;
+ unsigned int intervals;
+ blk_status_t status;
++ gfp_t gfp = GFP_NOIO;
+
+ if (!bi)
+ return true;
+@@ -238,12 +239,20 @@ bool bio_integrity_prep(struct bio *bio)
+ if (!bi->profile->generate_fn ||
+ !(bi->flags & BLK_INTEGRITY_GENERATE))
+ return true;
++
++ /*
++ * Zero the memory allocated to not leak uninitialized kernel
++ * memory to disk. For PI this only affects the app tag, but
++ * for non-integrity metadata it affects the entire metadata
++ * buffer.
++ */
++ gfp |= __GFP_ZERO;
+ }
+ intervals = bio_integrity_intervals(bi, bio_sectors(bio));
+
+ /* Allocate kernel buffer for protection data */
+ len = intervals * bi->tuple_size;
+- buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
++ buf = kmalloc(len, gfp | q->bounce_gfp);
+ status = BLK_STS_RESOURCE;
+ if (unlikely(buf == NULL)) {
+ printk(KERN_ERR "could not allocate integrity buffer\n");
--- /dev/null
+From stable+bounces-72956-greg=kroah.com@vger.kernel.org Wed Sep 4 03:36:41 2024
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+Date: Tue, 3 Sep 2024 18:28:51 -0700
+Subject: bpf, cgroup: Assign cgroup in cgroup_sk_alloc when called from interrupt
+To: <stable@vger.kernel.org>
+Cc: <martin.kelly@crowdstrike.com>, Daniel Borkmann <daniel@iogearbox.net>, Connor O'Brien <connor.obrien@crowdstrike.com>
+Message-ID: <20240904012851.58167-2-connor.obrien@crowdstrike.com>
+
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 78cc316e9583067884eb8bd154301dc1e9ee945c upstream.
+
+If cgroup_sk_alloc() is called from interrupt context, then just assign the
+root cgroup to skcd->cgroup. Prior to commit 8520e224f547 ("bpf, cgroups:
+Fix cgroup v2 fallback on v1/v2 mixed mode") we would just return, and later
+on in sock_cgroup_ptr(), we were NULL-testing the cgroup in fast-path, and
+iff indeed NULL returning the root cgroup (v ?: &cgrp_dfl_root.cgrp). Rather
+than re-adding the NULL-test to the fast-path we can just assign it once from
+cgroup_sk_alloc() given v1/v2 handling has been simplified. The migration from
+NULL test with returning &cgrp_dfl_root.cgrp to assigning &cgrp_dfl_root.cgrp
+directly does /not/ change behavior for callers of sock_cgroup_ptr().
+
+syzkaller was able to trigger a splat in the legacy netrom code base, where
+the RX handler in nr_rx_frame() calls nr_make_new() which calls sk_alloc()
+and therefore cgroup_sk_alloc() with in_interrupt() condition. Thus the NULL
+skcd->cgroup, where it trips over on cgroup_sk_free() side given it expects
+a non-NULL object. There are a few other candidates aside from netrom which
+have similar pattern where in their accept-like implementation, they just call
+to sk_alloc() and thus cgroup_sk_alloc() instead of sk_clone_lock() with the
+corresponding cgroup_sk_clone() which then inherits the cgroup from the parent
+socket. None of them are related to core protocols where BPF cgroup programs
+are running from. However, in future, they should follow to implement a similar
+inheritance mechanism.
+
+Additionally, with a !CONFIG_CGROUP_NET_PRIO and !CONFIG_CGROUP_NET_CLASSID
+configuration, the same issue was exposed also prior to 8520e224f547 due to
+commit e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated
+cgroup") which added the early in_interrupt() return back then.
+
+Fixes: 8520e224f547 ("bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode")
+Fixes: e876ecc67db8 ("cgroup: memcg: net: do not associate sock with unrelated cgroup")
+Reported-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com
+Reported-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Tested-by: syzbot+df709157a4ecaf192b03@syzkaller.appspotmail.com
+Tested-by: syzbot+533f389d4026d86a2a95@syzkaller.appspotmail.com
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/bpf/20210927123921.21535-1-daniel@iogearbox.net
+Signed-off-by: Connor O'Brien <connor.obrien@crowdstrike.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/cgroup/cgroup.c | 17 ++++++++++++-----
+ 1 file changed, 12 insertions(+), 5 deletions(-)
+
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6559,22 +6559,29 @@ int cgroup_parse_float(const char *input
+
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ {
+- /* Don't associate the sock with unrelated interrupted task's cgroup. */
+- if (in_interrupt())
+- return;
++ struct cgroup *cgroup;
+
+ rcu_read_lock();
++ /* Don't associate the sock with unrelated interrupted task's cgroup. */
++ if (in_interrupt()) {
++ cgroup = &cgrp_dfl_root.cgrp;
++ cgroup_get(cgroup);
++ goto out;
++ }
++
+ while (true) {
+ struct css_set *cset;
+
+ cset = task_css_set(current);
+ if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+- skcd->cgroup = cset->dfl_cgrp;
+- cgroup_bpf_get(cset->dfl_cgrp);
++ cgroup = cset->dfl_cgrp;
+ break;
+ }
+ cpu_relax();
+ }
++out:
++ skcd->cgroup = cgroup;
++ cgroup_bpf_get(cgroup);
+ rcu_read_unlock();
+ }
+
--- /dev/null
+From stable+bounces-72955-greg=kroah.com@vger.kernel.org Wed Sep 4 03:36:18 2024
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+Date: Tue, 3 Sep 2024 18:28:50 -0700
+Subject: bpf, cgroups: Fix cgroup v2 fallback on v1/v2 mixed mode
+To: <stable@vger.kernel.org>
+Cc: <martin.kelly@crowdstrike.com>, Daniel Borkmann <daniel@iogearbox.net>, Connor O'Brien <connor.obrien@crowdstrike.com>
+Message-ID: <20240904012851.58167-1-connor.obrien@crowdstrike.com>
+
+From: Connor O'Brien <connor.obrien@crowdstrike.com>
+
+From: Daniel Borkmann <daniel@iogearbox.net>
+
+commit 8520e224f547cd070c7c8f97b1fc6d58cff7ccaa upstream.
+
+Fix cgroup v1 interference when non-root cgroup v2 BPF programs are used.
+Back in the days, commit bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+embedded per-socket cgroup information into sock->sk_cgrp_data and in order
+to save 8 bytes in struct sock made both mutually exclusive, that is, when
+cgroup v1 socket tagging (e.g. net_cls/net_prio) is used, then cgroup v2
+falls back to the root cgroup in sock_cgroup_ptr() (&cgrp_dfl_root.cgrp).
+
+The assumption made was "there is no reason to mix the two and this is in line
+with how legacy and v2 compatibility is handled" as stated in bd1060a1d671.
+However, with Kubernetes more widely supporting cgroups v2 as well nowadays,
+this assumption no longer holds, and the possibility of the v1/v2 mixed mode
+with the v2 root fallback being hit becomes a real security issue.
+
+Many of the cgroup v2 BPF programs are also used for policy enforcement, just
+to pick _one_ example, that is, to programmatically deny socket related system
+calls like connect(2) or bind(2). A v2 root fallback would implicitly cause
+a policy bypass for the affected Pods.
+
+In production environments, we have recently seen this case due to various
+circumstances: i) a different 3rd party agent and/or ii) a container runtime
+such as [0] in the user's environment configuring legacy cgroup v1 net_cls
+tags, which triggered implicitly mentioned root fallback. Another case is
+Kubernetes projects like kind [1] which create Kubernetes nodes in a container
+and also add cgroup namespaces to the mix, meaning programs which are attached
+to the cgroup v2 root of the cgroup namespace get attached to a non-root
+cgroup v2 path from init namespace point of view. And the latter's root is
+out of reach for agents on a kind Kubernetes node to configure. Meaning, any
+entity on the node setting cgroup v1 net_cls tag will trigger the bypass
+despite cgroup v2 BPF programs attached to the namespace root.
+
+Generally, this mutual exclusiveness does not hold anymore in today's user
+environments and makes cgroup v2 usage from BPF side fragile and unreliable.
+This fix adds proper struct cgroup pointer for the cgroup v2 case to struct
+sock_cgroup_data in order to address these issues; this implicitly also fixes
+the tradeoffs being made back then with regards to races and refcount leaks
+as stated in bd1060a1d671, and removes the fallback, so that cgroup v2 BPF
+programs always operate as expected.
+
+ [0] https://github.com/nestybox/sysbox/
+ [1] https://kind.sigs.k8s.io/
+
+Fixes: bd1060a1d671 ("sock, cgroup: add sock->sk_cgroup")
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Link: https://lore.kernel.org/bpf/20210913230759.2313-1-daniel@iogearbox.net
+[resolve trivial conflicts]
+Signed-off-by: Connor O'Brien <connor.obrien@crowdstrike.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/cgroup-defs.h | 107 ++++++++++---------------------------------
+ include/linux/cgroup.h | 22 --------
+ kernel/cgroup/cgroup.c | 50 ++++----------------
+ net/core/netclassid_cgroup.c | 7 --
+ net/core/netprio_cgroup.c | 10 ----
+ 5 files changed, 41 insertions(+), 155 deletions(-)
+
+--- a/include/linux/cgroup-defs.h
++++ b/include/linux/cgroup-defs.h
+@@ -764,107 +764,54 @@ static inline void cgroup_threadgroup_ch
+ * sock_cgroup_data is embedded at sock->sk_cgrp_data and contains
+ * per-socket cgroup information except for memcg association.
+ *
+- * On legacy hierarchies, net_prio and net_cls controllers directly set
+- * attributes on each sock which can then be tested by the network layer.
+- * On the default hierarchy, each sock is associated with the cgroup it was
+- * created in and the networking layer can match the cgroup directly.
+- *
+- * To avoid carrying all three cgroup related fields separately in sock,
+- * sock_cgroup_data overloads (prioidx, classid) and the cgroup pointer.
+- * On boot, sock_cgroup_data records the cgroup that the sock was created
+- * in so that cgroup2 matches can be made; however, once either net_prio or
+- * net_cls starts being used, the area is overriden to carry prioidx and/or
+- * classid. The two modes are distinguished by whether the lowest bit is
+- * set. Clear bit indicates cgroup pointer while set bit prioidx and
+- * classid.
+- *
+- * While userland may start using net_prio or net_cls at any time, once
+- * either is used, cgroup2 matching no longer works. There is no reason to
+- * mix the two and this is in line with how legacy and v2 compatibility is
+- * handled. On mode switch, cgroup references which are already being
+- * pointed to by socks may be leaked. While this can be remedied by adding
+- * synchronization around sock_cgroup_data, given that the number of leaked
+- * cgroups is bound and highly unlikely to be high, this seems to be the
+- * better trade-off.
++ * On legacy hierarchies, net_prio and net_cls controllers directly
++ * set attributes on each sock which can then be tested by the network
++ * layer. On the default hierarchy, each sock is associated with the
++ * cgroup it was created in and the networking layer can match the
++ * cgroup directly.
+ */
+ struct sock_cgroup_data {
+- union {
+-#ifdef __LITTLE_ENDIAN
+- struct {
+- u8 is_data : 1;
+- u8 no_refcnt : 1;
+- u8 unused : 6;
+- u8 padding;
+- u16 prioidx;
+- u32 classid;
+- } __packed;
+-#else
+- struct {
+- u32 classid;
+- u16 prioidx;
+- u8 padding;
+- u8 unused : 6;
+- u8 no_refcnt : 1;
+- u8 is_data : 1;
+- } __packed;
++ struct cgroup *cgroup; /* v2 */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ u32 classid; /* v1 */
++#endif
++#ifdef CONFIG_CGROUP_NET_PRIO
++ u16 prioidx; /* v1 */
+ #endif
+- u64 val;
+- };
+ };
+
+-/*
+- * There's a theoretical window where the following accessors race with
+- * updaters and return part of the previous pointer as the prioidx or
+- * classid. Such races are short-lived and the result isn't critical.
+- */
+ static inline u16 sock_cgroup_prioidx(const struct sock_cgroup_data *skcd)
+ {
+- /* fallback to 1 which is always the ID of the root cgroup */
+- return (skcd->is_data & 1) ? skcd->prioidx : 1;
++#ifdef CONFIG_CGROUP_NET_PRIO
++ return READ_ONCE(skcd->prioidx);
++#else
++ return 1;
++#endif
+ }
+
+ static inline u32 sock_cgroup_classid(const struct sock_cgroup_data *skcd)
+ {
+- /* fallback to 0 which is the unconfigured default classid */
+- return (skcd->is_data & 1) ? skcd->classid : 0;
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ return READ_ONCE(skcd->classid);
++#else
++ return 0;
++#endif
+ }
+
+-/*
+- * If invoked concurrently, the updaters may clobber each other. The
+- * caller is responsible for synchronization.
+- */
+ static inline void sock_cgroup_set_prioidx(struct sock_cgroup_data *skcd,
+ u16 prioidx)
+ {
+- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+- if (sock_cgroup_prioidx(&skcd_buf) == prioidx)
+- return;
+-
+- if (!(skcd_buf.is_data & 1)) {
+- skcd_buf.val = 0;
+- skcd_buf.is_data = 1;
+- }
+-
+- skcd_buf.prioidx = prioidx;
+- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_PRIO
++ WRITE_ONCE(skcd->prioidx, prioidx);
++#endif
+ }
+
+ static inline void sock_cgroup_set_classid(struct sock_cgroup_data *skcd,
+ u32 classid)
+ {
+- struct sock_cgroup_data skcd_buf = {{ .val = READ_ONCE(skcd->val) }};
+-
+- if (sock_cgroup_classid(&skcd_buf) == classid)
+- return;
+-
+- if (!(skcd_buf.is_data & 1)) {
+- skcd_buf.val = 0;
+- skcd_buf.is_data = 1;
+- }
+-
+- skcd_buf.classid = classid;
+- WRITE_ONCE(skcd->val, skcd_buf.val); /* see sock_cgroup_ptr() */
++#ifdef CONFIG_CGROUP_NET_CLASSID
++ WRITE_ONCE(skcd->classid, classid);
++#endif
+ }
+
+ #else /* CONFIG_SOCK_CGROUP_DATA */
+--- a/include/linux/cgroup.h
++++ b/include/linux/cgroup.h
+@@ -816,33 +816,13 @@ static inline void cgroup_account_cputim
+ */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-extern spinlock_t cgroup_sk_update_lock;
+-#endif
+-
+-void cgroup_sk_alloc_disable(void);
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd);
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd);
+ void cgroup_sk_free(struct sock_cgroup_data *skcd);
+
+ static inline struct cgroup *sock_cgroup_ptr(struct sock_cgroup_data *skcd)
+ {
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+- unsigned long v;
+-
+- /*
+- * @skcd->val is 64bit but the following is safe on 32bit too as we
+- * just need the lower ulong to be written and read atomically.
+- */
+- v = READ_ONCE(skcd->val);
+-
+- if (v & 3)
+- return &cgrp_dfl_root.cgrp;
+-
+- return (struct cgroup *)(unsigned long)v ?: &cgrp_dfl_root.cgrp;
+-#else
+- return (struct cgroup *)(unsigned long)skcd->val;
+-#endif
++ return skcd->cgroup;
+ }
+
+ #else /* CONFIG_CGROUP_DATA */
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -6557,74 +6557,44 @@ int cgroup_parse_float(const char *input
+ */
+ #ifdef CONFIG_SOCK_CGROUP_DATA
+
+-#if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
+-
+-DEFINE_SPINLOCK(cgroup_sk_update_lock);
+-static bool cgroup_sk_alloc_disabled __read_mostly;
+-
+-void cgroup_sk_alloc_disable(void)
+-{
+- if (cgroup_sk_alloc_disabled)
+- return;
+- pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
+- cgroup_sk_alloc_disabled = true;
+-}
+-
+-#else
+-
+-#define cgroup_sk_alloc_disabled false
+-
+-#endif
+-
+ void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
+ {
+- if (cgroup_sk_alloc_disabled) {
+- skcd->no_refcnt = 1;
+- return;
+- }
+-
+ /* Don't associate the sock with unrelated interrupted task's cgroup. */
+ if (in_interrupt())
+ return;
+
+ rcu_read_lock();
+-
+ while (true) {
+ struct css_set *cset;
+
+ cset = task_css_set(current);
+ if (likely(cgroup_tryget(cset->dfl_cgrp))) {
+- skcd->val = (unsigned long)cset->dfl_cgrp;
++ skcd->cgroup = cset->dfl_cgrp;
+ cgroup_bpf_get(cset->dfl_cgrp);
+ break;
+ }
+ cpu_relax();
+ }
+-
+ rcu_read_unlock();
+ }
+
+ void cgroup_sk_clone(struct sock_cgroup_data *skcd)
+ {
+- if (skcd->val) {
+- if (skcd->no_refcnt)
+- return;
+- /*
+- * We might be cloning a socket which is left in an empty
+- * cgroup and the cgroup might have already been rmdir'd.
+- * Don't use cgroup_get_live().
+- */
+- cgroup_get(sock_cgroup_ptr(skcd));
+- cgroup_bpf_get(sock_cgroup_ptr(skcd));
+- }
++ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
++
++ /*
++ * We might be cloning a socket which is left in an empty
++ * cgroup and the cgroup might have already been rmdir'd.
++ * Don't use cgroup_get_live().
++ */
++ cgroup_get(cgrp);
++ cgroup_bpf_get(cgrp);
+ }
+
+ void cgroup_sk_free(struct sock_cgroup_data *skcd)
+ {
+ struct cgroup *cgrp = sock_cgroup_ptr(skcd);
+
+- if (skcd->no_refcnt)
+- return;
+ cgroup_bpf_put(cgrp);
+ cgroup_put(cgrp);
+ }
+--- a/net/core/netclassid_cgroup.c
++++ b/net/core/netclassid_cgroup.c
+@@ -72,11 +72,8 @@ static int update_classid_sock(const voi
+ struct update_classid_context *ctx = (void *)v;
+ struct socket *sock = sock_from_file(file, &err);
+
+- if (sock) {
+- spin_lock(&cgroup_sk_update_lock);
++ if (sock)
+ sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
+- spin_unlock(&cgroup_sk_update_lock);
+- }
+ if (--ctx->batch == 0) {
+ ctx->batch = UPDATE_CLASSID_BATCH;
+ return n + 1;
+@@ -122,8 +119,6 @@ static int write_classid(struct cgroup_s
+ struct css_task_iter it;
+ struct task_struct *p;
+
+- cgroup_sk_alloc_disable();
+-
+ cs->classid = (u32)value;
+
+ css_task_iter_start(css, 0, &it);
+--- a/net/core/netprio_cgroup.c
++++ b/net/core/netprio_cgroup.c
+@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kern
+ if (!dev)
+ return -ENODEV;
+
+- cgroup_sk_alloc_disable();
+-
+ rtnl_lock();
+
+ ret = netprio_set_prio(of_css(of), dev, prio);
+@@ -222,12 +220,10 @@ static int update_netprio(const void *v,
+ {
+ int err;
+ struct socket *sock = sock_from_file(file, &err);
+- if (sock) {
+- spin_lock(&cgroup_sk_update_lock);
++
++ if (sock)
+ sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
+ (unsigned long)v);
+- spin_unlock(&cgroup_sk_update_lock);
+- }
+ return 0;
+ }
+
+@@ -236,8 +232,6 @@ static void net_prio_attach(struct cgrou
+ struct task_struct *p;
+ struct cgroup_subsys_state *css;
+
+- cgroup_sk_alloc_disable();
+-
+ cgroup_taskset_for_each(p, css, tset) {
+ void *v = (void *)(unsigned long)css->id;
+
--- /dev/null
+From 50151b7f1c79a09117837eb95b76c2de76841dab Mon Sep 17 00:00:00 2001
+From: Bob Zhou <bob.zhou@amd.com>
+Date: Fri, 31 May 2024 15:01:22 +0800
+Subject: drm/amd/pm: Fix the null pointer dereference for vega10_hwmgr
+
+From: Bob Zhou <bob.zhou@amd.com>
+
+commit 50151b7f1c79a09117837eb95b76c2de76841dab upstream.
+
+Check return value and conduct null pointer handling to avoid null pointer dereference.
+
+Signed-off-by: Bob Zhou <bob.zhou@amd.com>
+Reviewed-by: Tim Huang <Tim.Huang@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Mukul Sikka <mukul.sikka@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c | 30 +++++++++++++++---
+ 1 file changed, 26 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+@@ -3410,13 +3410,17 @@ static int vega10_find_dpm_states_clocks
+ const struct vega10_power_state *vega10_ps =
+ cast_const_phw_vega10_power_state(states->pnew_state);
+ struct vega10_single_dpm_table *sclk_table = &(data->dpm_table.gfx_table);
+- uint32_t sclk = vega10_ps->performance_levels
+- [vega10_ps->performance_level_count - 1].gfx_clock;
+ struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
+- uint32_t mclk = vega10_ps->performance_levels
+- [vega10_ps->performance_level_count - 1].mem_clock;
++ uint32_t sclk, mclk;
+ uint32_t i;
+
++ if (vega10_ps == NULL)
++ return -EINVAL;
++ sclk = vega10_ps->performance_levels
++ [vega10_ps->performance_level_count - 1].gfx_clock;
++ mclk = vega10_ps->performance_levels
++ [vega10_ps->performance_level_count - 1].mem_clock;
++
+ for (i = 0; i < sclk_table->count; i++) {
+ if (sclk == sclk_table->dpm_levels[i].value)
+ break;
+@@ -3723,6 +3727,9 @@ static int vega10_generate_dpm_level_ena
+ cast_const_phw_vega10_power_state(states->pnew_state);
+ int i;
+
++ if (vega10_ps == NULL)
++ return -EINVAL;
++
+ PP_ASSERT_WITH_CODE(!vega10_trim_dpm_states(hwmgr, vega10_ps),
+ "Attempt to Trim DPM States Failed!",
+ return -1);
+@@ -4858,6 +4865,9 @@ static int vega10_check_states_equal(str
+
+ psa = cast_const_phw_vega10_power_state(pstate1);
+ psb = cast_const_phw_vega10_power_state(pstate2);
++ if (psa == NULL || psb == NULL)
++ return -EINVAL;
++
+ /* If the two states don't even have the same number of performance levels they cannot be the same state. */
+ if (psa->performance_level_count != psb->performance_level_count) {
+ *equal = false;
+@@ -4983,6 +4993,8 @@ static int vega10_set_sclk_od(struct pp_
+ return -EINVAL;
+
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++ if (vega10_ps == NULL)
++ return -EINVAL;
+
+ vega10_ps->performance_levels
+ [vega10_ps->performance_level_count - 1].gfx_clock =
+@@ -5034,6 +5046,8 @@ static int vega10_set_mclk_od(struct pp_
+ return -EINVAL;
+
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++ if (vega10_ps == NULL)
++ return -EINVAL;
+
+ vega10_ps->performance_levels
+ [vega10_ps->performance_level_count - 1].mem_clock =
+@@ -5269,6 +5283,9 @@ static void vega10_odn_update_power_stat
+ return;
+
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++ if (vega10_ps == NULL)
++ return;
++
+ max_level = vega10_ps->performance_level_count - 1;
+
+ if (vega10_ps->performance_levels[max_level].gfx_clock !=
+@@ -5291,6 +5308,9 @@ static void vega10_odn_update_power_stat
+
+ ps = (struct pp_power_state *)((unsigned long)(hwmgr->ps) + hwmgr->ps_size * (hwmgr->num_ps - 1));
+ vega10_ps = cast_phw_vega10_power_state(&ps->hardware);
++ if (vega10_ps == NULL)
++ return;
++
+ max_level = vega10_ps->performance_level_count - 1;
+
+ if (vega10_ps->performance_levels[max_level].gfx_clock !=
+@@ -5481,6 +5501,8 @@ static int vega10_get_performance_level(
+ return -EINVAL;
+
+ ps = cast_const_phw_vega10_power_state(state);
++ if (ps == NULL)
++ return -EINVAL;
+
+ i = index > ps->performance_level_count - 1 ?
+ ps->performance_level_count - 1 : index;
--- /dev/null
+From 04e568a3b31cfbd545c04c8bfc35c20e5ccfce0f Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 7 Dec 2022 12:27:04 +0100
+Subject: ext4: handle redirtying in ext4_bio_write_page()
+
+From: Jan Kara <jack@suse.cz>
+
+commit 04e568a3b31cfbd545c04c8bfc35c20e5ccfce0f upstream.
+
+Since we want to transition transaction commits to use ext4_writepages()
+for writing back ordered, add handling of page redirtying into
+ext4_bio_write_page(). Also move buffer dirty bit clearing into the same
+place other buffer state handling.
+
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20221207112722.22220-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ext4/page-io.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/fs/ext4/page-io.c
++++ b/fs/ext4/page-io.c
+@@ -493,6 +493,13 @@ int ext4_bio_write_page(struct ext4_io_s
+ /* A hole? We can safely clear the dirty bit */
+ if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
++ /*
++ * Keeping dirty some buffer we cannot write? Make
++ * sure to redirty the page. This happens e.g. when
++ * doing writeout for transaction commit.
++ */
++ if (buffer_dirty(bh) && !PageDirty(page))
++ redirty_page_for_writepage(wbc, page);
+ if (io->io_bio)
+ ext4_io_submit(io);
+ continue;
+@@ -500,6 +507,7 @@ int ext4_bio_write_page(struct ext4_io_s
+ if (buffer_new(bh))
+ clear_buffer_new(bh);
+ set_buffer_async_write(bh);
++ clear_buffer_dirty(bh);
+ nr_to_submit++;
+ } while ((bh = bh->b_this_page) != head);
+
+@@ -542,7 +550,10 @@ int ext4_bio_write_page(struct ext4_io_s
+ printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
+ redirty_page_for_writepage(wbc, page);
+ do {
+- clear_buffer_async_write(bh);
++ if (buffer_async_write(bh)) {
++ clear_buffer_async_write(bh);
++ set_buffer_dirty(bh);
++ }
+ bh = bh->b_this_page;
+ } while (bh != head);
+ goto unlock;
+@@ -555,7 +566,6 @@ int ext4_bio_write_page(struct ext4_io_s
+ continue;
+ io_submit_add_bh(io, inode, page, bounce_page, bh);
+ nr_submitted++;
+- clear_buffer_dirty(bh);
+ } while ((bh = bh->b_this_page) != head);
+
+ unlock:
--- /dev/null
+From 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 Mon Sep 17 00:00:00 2001
+From: Stanislav Fomichev <sdf@google.com>
+Date: Wed, 8 Nov 2023 13:13:25 -0800
+Subject: net: set SOCK_RCU_FREE before inserting socket into hashtable
+
+From: Stanislav Fomichev <sdf@google.com>
+
+commit 871019b22d1bcc9fab2d1feba1b9a564acbb6e99 upstream.
+
+We've started to see the following kernel traces:
+
+ WARNING: CPU: 83 PID: 0 at net/core/filter.c:6641 sk_lookup+0x1bd/0x1d0
+
+ Call Trace:
+ <IRQ>
+ __bpf_skc_lookup+0x10d/0x120
+ bpf_sk_lookup+0x48/0xd0
+ bpf_sk_lookup_tcp+0x19/0x20
+ bpf_prog_<redacted>+0x37c/0x16a3
+ cls_bpf_classify+0x205/0x2e0
+ tcf_classify+0x92/0x160
+ __netif_receive_skb_core+0xe52/0xf10
+ __netif_receive_skb_list_core+0x96/0x2b0
+ napi_complete_done+0x7b5/0xb70
+ <redacted>_poll+0x94/0xb0
+ net_rx_action+0x163/0x1d70
+ __do_softirq+0xdc/0x32e
+ asm_call_irq_on_stack+0x12/0x20
+ </IRQ>
+ do_softirq_own_stack+0x36/0x50
+ do_softirq+0x44/0x70
+
+__inet_hash can race with lockless (rcu) readers on the other cpus:
+
+ __inet_hash
+ __sk_nulls_add_node_rcu
+ <- (bpf triggers here)
+ sock_set_flag(SOCK_RCU_FREE)
+
+Let's move the SOCK_RCU_FREE part up a bit, before we are inserting
+the socket into hashtables. Note, that the race is really harmless;
+the bpf callers are handling this situation (where listener socket
+doesn't have SOCK_RCU_FREE set) correctly, so the only
+annoyance is a WARN_ONCE.
+
+More details from Eric regarding SOCK_RCU_FREE timeline:
+
+Commit 3b24d854cb35 ("tcp/dccp: do not touch listener sk_refcnt under
+synflood") added SOCK_RCU_FREE. At that time, the precise location of
+sock_set_flag(sk, SOCK_RCU_FREE) did not matter, because the thread calling
+__inet_hash() owns a reference on sk. SOCK_RCU_FREE was only tested
+at dismantle time.
+
+Commit 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+started checking SOCK_RCU_FREE _after_ the lookup to infer whether
+the refcount has been taken care of.
+
+Fixes: 6acc9b432e67 ("bpf: Add helper to retrieve socket in BPF")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Stanislav Fomichev <sdf@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+[Resolved conflict for 5.10 and below.]
+Signed-off-by: Siddh Raman Pant <siddh.raman.pant@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_hashtables.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -653,6 +653,7 @@ int __inet_hash(struct sock *sk, struct
+ if (err)
+ goto unlock;
+ }
++ sock_set_flag(sk, SOCK_RCU_FREE);
+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
+ sk->sk_family == AF_INET6)
+ __sk_nulls_add_node_tail_rcu(sk, &ilb->nulls_head);
+@@ -660,7 +661,6 @@ int __inet_hash(struct sock *sk, struct
+ __sk_nulls_add_node_rcu(sk, &ilb->nulls_head);
+ inet_hash2(hashinfo, sk);
+ ilb->count++;
+- sock_set_flag(sk, SOCK_RCU_FREE);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ unlock:
+ spin_unlock(&ilb->lock);
--- /dev/null
+From cc5645fddb0ce28492b15520306d092730dffa48 Mon Sep 17 00:00:00 2001
+From: Nikita Kiryushin <kiryushin@ancud.ru>
+Date: Wed, 27 Mar 2024 20:47:47 +0300
+Subject: rcu-tasks: Fix show_rcu_tasks_trace_gp_kthread buffer overflow
+
+From: Nikita Kiryushin <kiryushin@ancud.ru>
+
+commit cc5645fddb0ce28492b15520306d092730dffa48 upstream.
+
+There is a possibility of buffer overflow in
+show_rcu_tasks_trace_gp_kthread() if counters, passed
+to sprintf() are huge. Counter numbers, needed for this
+are unrealistically high, but buffer overflow is still
+possible.
+
+Use snprintf() with buffer size instead of sprintf().
+
+Found by Linux Verification Center (linuxtesting.org) with SVACE.
+
+Fixes: edf3775f0ad6 ("rcu-tasks: Add count for idle tasks on offline CPUs")
+Signed-off-by: Nikita Kiryushin <kiryushin@ancud.ru>
+Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Vamsi Krishna Brahmajosyula <vamsi-krishna.brahmajosyula@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/rcu/tasks.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -1240,7 +1240,7 @@ static void show_rcu_tasks_trace_gp_kthr
+ {
+ char buf[64];
+
+- sprintf(buf, "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
++ snprintf(buf, sizeof(buf), "N%d h:%lu/%lu/%lu", atomic_read(&trc_n_readers_need_end),
+ data_race(n_heavy_reader_ofl_updates),
+ data_race(n_heavy_reader_updates),
+ data_race(n_heavy_reader_attempts));
block-remove-the-blk_flush_integrity-call-in-blk_int.patch
drm-amd-display-skip-wbscl_set_scaler_filter-if-filt.patch
media-uvcvideo-enforce-alignment-of-frame-and-interv.patch
+block-initialize-integrity-buffer-to-zero-before-writing-it-to-media.patch
+drm-amd-pm-fix-the-null-pointer-dereference-for-vega10_hwmgr.patch
+bpf-cgroups-fix-cgroup-v2-fallback-on-v1-v2-mixed-mode.patch
+net-set-sock_rcu_free-before-inserting-socket-into-hashtable.patch
+virtio_net-fix-napi_skb_cache_put-warning.patch
+rcu-tasks-fix-show_rcu_tasks_trace_gp_kthread-buffer-overflow.patch
+udf-limit-file-size-to-4tb.patch
+ext4-handle-redirtying-in-ext4_bio_write_page.patch
+bpf-cgroup-assign-cgroup-in-cgroup_sk_alloc-when-called-from-interrupt.patch
--- /dev/null
+From c2efd13a2ed4f29bf9ef14ac2fbb7474084655f8 Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 25 Jan 2023 17:56:06 +0100
+Subject: udf: Limit file size to 4TB
+
+From: Jan Kara <jack@suse.cz>
+
+commit c2efd13a2ed4f29bf9ef14ac2fbb7474084655f8 upstream.
+
+UDF disk format supports in principle file sizes up to 1<<64-1. However
+the file space (including holes) is described by a linked list of
+extents, each of which can have at most 1GB. Thus the creation and
+handling of extents gets unusably slow beyond certain point. Limit the
+file size to 4TB to avoid locking up the kernel too easily.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/udf/super.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -86,6 +86,13 @@ enum {
+ #define UDF_MAX_LVID_NESTING 1000
+
+ enum { UDF_MAX_LINKS = 0xffff };
++/*
++ * We limit filesize to 4TB. This is arbitrary as the on-disk format supports
++ * more but because the file space is described by a linked list of extents,
++ * each of which can have at most 1GB, the creation and handling of extents
++ * gets unusably slow beyond certain point...
++ */
++#define UDF_MAX_FILESIZE (1ULL << 42)
+
+ /* These are the "meat" - everything else is stuffing */
+ static int udf_fill_super(struct super_block *, void *, int);
+@@ -2301,7 +2308,7 @@ static int udf_fill_super(struct super_b
+ ret = -ENOMEM;
+ goto error_out;
+ }
+- sb->s_maxbytes = MAX_LFS_FILESIZE;
++ sb->s_maxbytes = UDF_MAX_FILESIZE;
+ sb->s_max_links = UDF_MAX_LINKS;
+ return 0;
+
--- /dev/null
+From f8321fa75102246d7415a6af441872f6637c93ab Mon Sep 17 00:00:00 2001
+From: Breno Leitao <leitao@debian.org>
+Date: Fri, 12 Jul 2024 04:53:25 -0700
+Subject: virtio_net: Fix napi_skb_cache_put warning
+
+From: Breno Leitao <leitao@debian.org>
+
+commit f8321fa75102246d7415a6af441872f6637c93ab upstream.
+
+After the commit bdacf3e34945 ("net: Use nested-BH locking for
+napi_alloc_cache.") was merged, the following warning began to appear:
+
+ WARNING: CPU: 5 PID: 1 at net/core/skbuff.c:1451 napi_skb_cache_put+0x82/0x4b0
+
+ __warn+0x12f/0x340
+ napi_skb_cache_put+0x82/0x4b0
+ napi_skb_cache_put+0x82/0x4b0
+ report_bug+0x165/0x370
+ handle_bug+0x3d/0x80
+ exc_invalid_op+0x1a/0x50
+ asm_exc_invalid_op+0x1a/0x20
+ __free_old_xmit+0x1c8/0x510
+ napi_skb_cache_put+0x82/0x4b0
+ __free_old_xmit+0x1c8/0x510
+ __free_old_xmit+0x1c8/0x510
+ __pfx___free_old_xmit+0x10/0x10
+
+The issue arises because virtio is assuming it's running in NAPI context
+even when it's not, such as in the netpoll case.
+
+To resolve this, modify virtnet_poll_tx() to only set NAPI when budget
+is available. Same for virtnet_poll_cleantx(), which always assumed that
+it was in a NAPI context.
+
+Fixes: df133f3f9625 ("virtio_net: bulk free tx skbs")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Reviewed-by: Heng Qi <hengqi@linux.alibaba.com>
+Link: https://patch.msgid.link/20240712115325.54175-1-leitao@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[Shivani: Modified to apply on v4.19.y-v5.10.y]
+Signed-off-by: Shivani Agarwal <shivani.agarwal@broadcom.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/virtio_net.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -1497,7 +1497,7 @@ static bool is_xdp_raw_buffer_queue(stru
+ return false;
+ }
+
+-static void virtnet_poll_cleantx(struct receive_queue *rq)
++static void virtnet_poll_cleantx(struct receive_queue *rq, int budget)
+ {
+ struct virtnet_info *vi = rq->vq->vdev->priv;
+ unsigned int index = vq2rxq(rq->vq);
+@@ -1508,7 +1508,7 @@ static void virtnet_poll_cleantx(struct
+ return;
+
+ if (__netif_tx_trylock(txq)) {
+- free_old_xmit_skbs(sq, true);
++ free_old_xmit_skbs(sq, !!budget);
+ __netif_tx_unlock(txq);
+ }
+
+@@ -1525,7 +1525,7 @@ static int virtnet_poll(struct napi_stru
+ unsigned int received;
+ unsigned int xdp_xmit = 0;
+
+- virtnet_poll_cleantx(rq);
++ virtnet_poll_cleantx(rq, budget);
+
+ received = virtnet_receive(rq, budget, &xdp_xmit);
+
+@@ -1598,7 +1598,7 @@ static int virtnet_poll_tx(struct napi_s
+ txq = netdev_get_tx_queue(vi->dev, index);
+ __netif_tx_lock(txq, raw_smp_processor_id());
+ virtqueue_disable_cb(sq->vq);
+- free_old_xmit_skbs(sq, true);
++ free_old_xmit_skbs(sq, !!budget);
+
+ opaque = virtqueue_enable_cb_prepare(sq->vq);
+