]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Tue, 10 Jan 2023 01:55:36 +0000 (20:55 -0500)
committerSasha Levin <sashal@kernel.org>
Tue, 10 Jan 2023 01:55:36 +0000 (20:55 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
62 files changed:
queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch [new file with mode: 0644]
queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch [new file with mode: 0644]
queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch [new file with mode: 0644]
queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch [new file with mode: 0644]
queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch [new file with mode: 0644]
queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch [new file with mode: 0644]
queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch [new file with mode: 0644]
queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch [new file with mode: 0644]
queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch [new file with mode: 0644]
queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch [new file with mode: 0644]
queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch [new file with mode: 0644]
queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch [new file with mode: 0644]
queue-5.10/ext4-fix-various-seppling-typos.patch [new file with mode: 0644]
queue-5.10/ext4-goto-right-label-failed_mount3a.patch [new file with mode: 0644]
queue-5.10/ext4-move-functions-in-super.c.patch [new file with mode: 0644]
queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch [new file with mode: 0644]
queue-5.10/ext4-simplify-ext4-error-translation.patch [new file with mode: 0644]
queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch [new file with mode: 0644]
queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch [new file with mode: 0644]
queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch [new file with mode: 0644]
queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch [new file with mode: 0644]
queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch [new file with mode: 0644]
queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch [new file with mode: 0644]
queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch [new file with mode: 0644]
queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch [new file with mode: 0644]
queue-5.10/mbcache-don-t-reclaim-used-entries.patch [new file with mode: 0644]
queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch [new file with mode: 0644]
queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch [new file with mode: 0644]
queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch [new file with mode: 0644]
queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch [new file with mode: 0644]
queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch [new file with mode: 0644]
queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch [new file with mode: 0644]
queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch [new file with mode: 0644]
queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch [new file with mode: 0644]
queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch [new file with mode: 0644]
queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch [new file with mode: 0644]
queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch [new file with mode: 0644]
queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch [new file with mode: 0644]
queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch [new file with mode: 0644]
queue-5.10/nfc-fix-potential-resource-leaks.patch [new file with mode: 0644]
queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch [new file with mode: 0644]
queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch [new file with mode: 0644]
queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch [new file with mode: 0644]
queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch [new file with mode: 0644]
queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch [new file with mode: 0644]
queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch [new file with mode: 0644]
queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch [new file with mode: 0644]
queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch [new file with mode: 0644]
queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch [new file with mode: 0644]
queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch [new file with mode: 0644]
queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch [new file with mode: 0644]
queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch [new file with mode: 0644]
queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch [new file with mode: 0644]
queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch [new file with mode: 0644]
queue-5.10/vhost-fix-range-used-in-translate_desc.patch [new file with mode: 0644]
queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch [new file with mode: 0644]
queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch [new file with mode: 0644]
queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-convert-to-insn_decode.patch [new file with mode: 0644]
queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch [new file with mode: 0644]

diff --git a/queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch b/queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch
new file mode 100644 (file)
index 0000000..166c06a
--- /dev/null
@@ -0,0 +1,59 @@
+From d8f13e5a796c3a9e068b494702f7e0dda43da1db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:32:46 +0100
+Subject: ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071
+ tablet
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ]
+
+The Advantech MICA-071 tablet deviates from the defaults for
+a non CR Bay Trail based tablet in several ways:
+
+1. It uses an analog MIC on IN3 rather then using DMIC1
+2. It only has 1 speaker
+3. It needs the OVCD current threshold to be set to 1500uA instead of
+   the default 2000uA to reliable differentiate between headphones vs
+   headsets
+
+Add a quirk with these settings for this tablet.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++
+ 1 file changed, 15 insertions(+)
+
+diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c
+index 3020a993f6ef..8a99cb6dfcd6 100644
+--- a/sound/soc/intel/boards/bytcr_rt5640.c
++++ b/sound/soc/intel/boards/bytcr_rt5640.c
+@@ -430,6 +430,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
+                                       BYT_RT5640_SSP0_AIF1 |
+                                       BYT_RT5640_MCLK_EN),
+       },
++      {
++              /* Advantech MICA-071 */
++              .matches = {
++                      DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"),
++                      DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"),
++              },
++              /* OVCD Th = 1500uA to reliable detect head-phones vs -set */
++              .driver_data = (void *)(BYT_RT5640_IN3_MAP |
++                                      BYT_RT5640_JD_SRC_JD2_IN4N |
++                                      BYT_RT5640_OVCD_TH_1500UA |
++                                      BYT_RT5640_OVCD_SF_0P75 |
++                                      BYT_RT5640_MONO_SPEAKER |
++                                      BYT_RT5640_DIFF_MIC |
++                                      BYT_RT5640_MCLK_EN),
++      },
+       {
+               .matches = {
+                       DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"),
+-- 
+2.35.1
+
diff --git a/queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch b/queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch
new file mode 100644 (file)
index 0000000..fb24a93
--- /dev/null
@@ -0,0 +1,61 @@
+From 4f6827171291d128efd7c11c395dd9c39ebbd3ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 16:47:00 -0800
+Subject: bpf: pull before calling skb_postpull_rcsum()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ]
+
+Anand hit a BUG() when pulling off headers on egress to a SW tunnel.
+We get to skb_checksum_help() with an invalid checksum offset
+(commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()")
+converted those BUGs to WARN_ONs()).
+He points out oddness in how skb_postpull_rcsum() gets used.
+Indeed looks like we should pull before "postpull", otherwise
+the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not
+be able to do its job:
+
+       if (skb->ip_summed == CHECKSUM_PARTIAL &&
+           skb_checksum_start_offset(skb) < 0)
+               skb->ip_summed = CHECKSUM_NONE;
+
+Reported-by: Anand Parthasarathy <anpartha@meta.com>
+Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper")
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/filter.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/filter.c b/net/core/filter.c
+index e3cdbd4996e0..a5df0cf46bbf 100644
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -3201,15 +3201,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
+ static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
+ {
++      void *old_data;
++
+       /* skb_ensure_writable() is not needed here, as we're
+        * already working on an uncloned skb.
+        */
+       if (unlikely(!pskb_may_pull(skb, off + len)))
+               return -ENOMEM;
+-      skb_postpull_rcsum(skb, skb->data + off, len);
+-      memmove(skb->data + len, skb->data, off);
++      old_data = skb->data;
+       __skb_pull(skb, len);
++      skb_postpull_rcsum(skb, old_data + off, len);
++      memmove(skb->data, old_data, off);
+       return 0;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch b/queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch
new file mode 100644 (file)
index 0000000..c4981cd
--- /dev/null
@@ -0,0 +1,47 @@
+From 7416533f696975e399043b8bcabadb04a2fdc487 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 14:51:46 +0800
+Subject: caif: fix memory leak in cfctrl_linkup_request()
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ]
+
+When linktype is unknown or kzalloc failed in cfctrl_linkup_request(),
+pkt is not released. Add release process to error path.
+
+Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack")
+Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/caif/cfctrl.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
+index 2809cbd6b7f7..d8cb4b2a076b 100644
+--- a/net/caif/cfctrl.c
++++ b/net/caif/cfctrl.c
+@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
+       default:
+               pr_warn("Request setup of bad link type = %d\n",
+                       param->linktype);
++              cfpkt_destroy(pkt);
+               return -EINVAL;
+       }
+       req = kzalloc(sizeof(*req), GFP_KERNEL);
+-      if (!req)
++      if (!req) {
++              cfpkt_destroy(pkt);
+               return -ENOMEM;
++      }
++
+       req->client_layer = user_layer;
+       req->cmd = CFCTRL_CMD_LINK_SETUP;
+       req->param = *param;
+-- 
+2.35.1
+
diff --git a/queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch b/queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch
new file mode 100644 (file)
index 0000000..7386944
--- /dev/null
@@ -0,0 +1,85 @@
+From 3346e174106072f3133daa6aa377a4779ebadadf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Nov 2022 10:43:21 +0800
+Subject: ceph: switch to vfs_inode_has_locks() to fix file lock bug
+
+From: Xiubo Li <xiubli@redhat.com>
+
+[ Upstream commit 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c ]
+
+For the POSIX locks they are using the same owner, which is the
+thread id. And multiple POSIX locks could be merged into single one,
+so when checking whether the 'file' has locks may fail.
+
+For a file where some openers use locking and others don't is a
+really odd usage pattern though. Locks are like stoplights -- they
+only work if everyone pays attention to them.
+
+Just switch ceph_get_caps() to check whether any locks are set on
+the inode. If there are POSIX/OFD/FLOCK locks on the file at the
+time, we should set CHECK_FILELOCK, regardless of what fd was used
+to set the lock.
+
+Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks")
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ceph/caps.c  | 2 +-
+ fs/ceph/locks.c | 4 ----
+ fs/ceph/super.h | 1 -
+ 3 files changed, 1 insertion(+), 6 deletions(-)
+
+diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
+index 51562d36fa83..210496dc2fd4 100644
+--- a/fs/ceph/caps.c
++++ b/fs/ceph/caps.c
+@@ -2957,7 +2957,7 @@ int ceph_get_caps(struct file *filp, int need, int want,
+       while (true) {
+               flags &= CEPH_FILE_MODE_MASK;
+-              if (atomic_read(&fi->num_locks))
++              if (vfs_inode_has_locks(inode))
+                       flags |= CHECK_FILELOCK;
+               _got = 0;
+               ret = try_get_cap_refs(inode, need, want, endoff,
+diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
+index 048a435a29be..674d6ea89f71 100644
+--- a/fs/ceph/locks.c
++++ b/fs/ceph/locks.c
+@@ -32,18 +32,14 @@ void __init ceph_flock_init(void)
+ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
+ {
+-      struct ceph_file_info *fi = dst->fl_file->private_data;
+       struct inode *inode = file_inode(dst->fl_file);
+       atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+-      atomic_inc(&fi->num_locks);
+ }
+ static void ceph_fl_release_lock(struct file_lock *fl)
+ {
+-      struct ceph_file_info *fi = fl->fl_file->private_data;
+       struct inode *inode = file_inode(fl->fl_file);
+       struct ceph_inode_info *ci = ceph_inode(inode);
+-      atomic_dec(&fi->num_locks);
+       if (atomic_dec_and_test(&ci->i_filelock_ref)) {
+               /* clear error when all locks are released */
+               spin_lock(&ci->i_ceph_lock);
+diff --git a/fs/ceph/super.h b/fs/ceph/super.h
+index 4db305fd2a02..8716cb618cbb 100644
+--- a/fs/ceph/super.h
++++ b/fs/ceph/super.h
+@@ -772,7 +772,6 @@ struct ceph_file_info {
+       struct list_head rw_contexts;
+       u32 filp_gen;
+-      atomic_t num_locks;
+ };
+ struct ceph_dir_file_info {
+-- 
+2.35.1
+
diff --git a/queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch b/queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch
new file mode 100644 (file)
index 0000000..233a783
--- /dev/null
@@ -0,0 +1,39 @@
+From 79eb1ed97715af650b6d0a3366f4d85a08d94bb3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 12:53:35 +0300
+Subject: drivers/net/bonding/bond_3ad: return when there's no aggregator
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ]
+
+Otherwise we would dereference a NULL aggregator pointer when calling
+__set_agg_ports_ready on the line below.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Reviewed-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/bonding/bond_3ad.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
+index acb6ff0be5ff..320e5461853f 100644
+--- a/drivers/net/bonding/bond_3ad.c
++++ b/drivers/net/bonding/bond_3ad.c
+@@ -1520,6 +1520,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr)
+                       slave_err(bond->dev, port->slave->dev,
+                                 "Port %d did not find a suitable aggregator\n",
+                                 port->actor_port_number);
++                      return;
+               }
+       }
+       /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE
+-- 
+2.35.1
+
diff --git a/queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch b/queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch
new file mode 100644 (file)
index 0000000..868132f
--- /dev/null
@@ -0,0 +1,36 @@
+From 94f909db59e7ffeb90145539f2849033de797840 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Nov 2022 16:15:18 +0300
+Subject: drm/i915: unpin on error in intel_vgpu_shadow_mm_pin()
+
+From: Dan Carpenter <error27@gmail.com>
+
+[ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ]
+
+Call intel_vgpu_unpin_mm() on this error path.
+
+Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.")
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili
+Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gvt/scheduler.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
+index aed2ef6466a2..2bb6203298bc 100644
+--- a/drivers/gpu/drm/i915/gvt/scheduler.c
++++ b/drivers/gpu/drm/i915/gvt/scheduler.c
+@@ -647,6 +647,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload)
+       if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT ||
+           !workload->shadow_mm->ppgtt_mm.shadowed) {
++              intel_vgpu_unpin_mm(workload->shadow_mm);
+               gvt_vgpu_err("workload shadow ppgtt isn't ready\n");
+               return -EINVAL;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch b/queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch
new file mode 100644 (file)
index 0000000..e12cb4e
--- /dev/null
@@ -0,0 +1,56 @@
+From d096a74e4142c8ddf8f64d6a89dbb54c2ba56376 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 09:43:05 +0100
+Subject: drm/meson: Reduce the FIFO lines held when AFBC is not used
+
+From: Carlo Caione <ccaione@baylibre.com>
+
+[ Upstream commit 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc ]
+
+Having a bigger number of FIFO lines held after vsync is only useful to
+SoCs using AFBC to give time to the AFBC decoder to be reset, configured
+and enabled again.
+
+For SoCs not using AFBC this, on the contrary, is causing on some
+displays issues and a few pixels vertical offset in the displayed image.
+
+Conditionally increase the number of lines held after vsync only for
+SoCs using AFBC, leaving the default value for all the others.
+
+Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start")
+Signed-off-by: Carlo Caione <ccaione@baylibre.com>
+Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
+[narmstrong: added fixes tag]
+Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/meson/meson_viu.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
+index d4b907889a21..cd399b0b7181 100644
+--- a/drivers/gpu/drm/meson/meson_viu.c
++++ b/drivers/gpu/drm/meson/meson_viu.c
+@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv)
+       /* Initialize OSD1 fifo control register */
+       reg = VIU_OSD_DDR_PRIORITY_URGENT |
+-              VIU_OSD_HOLD_FIFO_LINES(31) |
+               VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */
+               VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */
+               VIU_OSD_FIFO_LIMITS(2);      /* fifo_lim: 2*16=32 */
+       if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A))
+-              reg |= VIU_OSD_BURST_LENGTH_32;
++              reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31));
+       else
+-              reg |= VIU_OSD_BURST_LENGTH_64;
++              reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4));
+       writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT));
+       writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT));
+-- 
+2.35.1
+
diff --git a/queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch b/queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch
new file mode 100644 (file)
index 0000000..91979d7
--- /dev/null
@@ -0,0 +1,138 @@
+From 97dae042255b2074cb2a8ed47e6c9c116d6de05e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 14:01:30 +0000
+Subject: drm/panfrost: Fix GEM handle creation ref-counting
+
+From: Steven Price <steven.price@arm.com>
+
+[ Upstream commit 4217c6ac817451d5116687f3cc6286220dc43d49 ]
+
+panfrost_gem_create_with_handle() previously returned a BO but with the
+only reference being from the handle, which user space could in theory
+guess and release, causing a use-after-free. Additionally if the call to
+panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then
+a(nother) reference on the BO was dropped.
+
+The _create_with_handle() is a problematic pattern, so ditch it and
+instead create the handle in panfrost_ioctl_create_bo(). If the call to
+panfrost_gem_mapping_get() fails then this means that user space has
+indeed gone behind our back and freed the handle. In which case just
+return an error code.
+
+Reported-by: Rob Clark <robdclark@chromium.org>
+Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver")
+Signed-off-by: Steven Price <steven.price@arm.com>
+Reviewed-by: Rob Clark <robdclark@gmail.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++---------
+ drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +--------------
+ drivers/gpu/drm/panfrost/panfrost_gem.h |  5 +----
+ 3 files changed, 20 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c
+index 1dfc457bbefc..4af25c0b6570 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_drv.c
++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c
+@@ -81,6 +81,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+       struct panfrost_gem_object *bo;
+       struct drm_panfrost_create_bo *args = data;
+       struct panfrost_gem_mapping *mapping;
++      int ret;
+       if (!args->size || args->pad ||
+           (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP)))
+@@ -91,21 +92,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data,
+           !(args->flags & PANFROST_BO_NOEXEC))
+               return -EINVAL;
+-      bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags,
+-                                           &args->handle);
++      bo = panfrost_gem_create(dev, args->size, args->flags);
+       if (IS_ERR(bo))
+               return PTR_ERR(bo);
++      ret = drm_gem_handle_create(file, &bo->base.base, &args->handle);
++      if (ret)
++              goto out;
++
+       mapping = panfrost_gem_mapping_get(bo, priv);
+-      if (!mapping) {
+-              drm_gem_object_put(&bo->base.base);
+-              return -EINVAL;
++      if (mapping) {
++              args->offset = mapping->mmnode.start << PAGE_SHIFT;
++              panfrost_gem_mapping_put(mapping);
++      } else {
++              /* This can only happen if the handle from
++               * drm_gem_handle_create() has already been guessed and freed
++               * by user space
++               */
++              ret = -EINVAL;
+       }
+-      args->offset = mapping->mmnode.start << PAGE_SHIFT;
+-      panfrost_gem_mapping_put(mapping);
+-
+-      return 0;
++out:
++      drm_gem_object_put(&bo->base.base);
++      return ret;
+ }
+ /**
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c
+index 1d917cea5ceb..c843fbfdb878 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.c
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c
+@@ -232,12 +232,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t
+ }
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+-                              struct drm_device *dev, size_t size,
+-                              u32 flags,
+-                              uint32_t *handle)
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags)
+ {
+-      int ret;
+       struct drm_gem_shmem_object *shmem;
+       struct panfrost_gem_object *bo;
+@@ -253,16 +249,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv,
+       bo->noexec = !!(flags & PANFROST_BO_NOEXEC);
+       bo->is_heap = !!(flags & PANFROST_BO_HEAP);
+-      /*
+-       * Allocate an id of idr table where the obj is registered
+-       * and handle has the id what user can see.
+-       */
+-      ret = drm_gem_handle_create(file_priv, &shmem->base, handle);
+-      /* drop reference from allocate - handle holds it now. */
+-      drm_gem_object_put(&shmem->base);
+-      if (ret)
+-              return ERR_PTR(ret);
+-
+       return bo;
+ }
+diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h
+index 8088d5fd8480..ad2877eeeccd 100644
+--- a/drivers/gpu/drm/panfrost/panfrost_gem.h
++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h
+@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev,
+                                  struct sg_table *sgt);
+ struct panfrost_gem_object *
+-panfrost_gem_create_with_handle(struct drm_file *file_priv,
+-                              struct drm_device *dev, size_t size,
+-                              u32 flags,
+-                              uint32_t *handle);
++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags);
+ int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv);
+ void panfrost_gem_close(struct drm_gem_object *obj,
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch b/queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch
new file mode 100644 (file)
index 0000000..c304755
--- /dev/null
@@ -0,0 +1,55 @@
+From 28685ef1bf313b266ed0bed36413816327baa182 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 15:43:43 +0800
+Subject: ext4: correct inconsistent error msg in nojournal mode
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 89481b5fa8c0640e62ba84c6020cee895f7ac643 ]
+
+When we used the journal_async_commit mounting option in nojournal mode,
+the kernel told me that "can't mount with journal_checksum", was very
+confusing. I find that when we mount with journal_async_commit, both the
+JOURNAL_ASYNC_COMMIT and EXPLICIT_JOURNAL_CHECKSUM flags are set. However,
+in the error branch, CHECKSUM is checked before ASYNC_COMMIT. As a result,
+the above inconsistency occurs, and the ASYNC_COMMIT branch becomes dead
+code that cannot be executed. Therefore, we exchange the positions of the
+two judgments to make the error msg more accurate.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20221109074343.4184862-1-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index eb82c1d4883c..43f06a71d612 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4812,14 +4812,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+               goto failed_mount3a;
+       } else {
+               /* Nojournal mode, all journal mount options are illegal */
+-              if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
++              if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+-                               "journal_checksum, fs mounted w/o journal");
++                               "journal_async_commit, fs mounted w/o journal");
+                       goto failed_mount3a;
+               }
+-              if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
++
++              if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+-                               "journal_async_commit, fs mounted w/o journal");
++                               "journal_checksum, fs mounted w/o journal");
+                       goto failed_mount3a;
+               }
+               if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch b/queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch
new file mode 100644 (file)
index 0000000..bd4009c
--- /dev/null
@@ -0,0 +1,143 @@
+From b6f8806be68152490927a71716b0f89e04c18313 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Nov 2022 20:39:50 +0100
+Subject: ext4: fix deadlock due to mbcache entry corruption
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd ]
+
+When manipulating xattr blocks, we can deadlock infinitely looping
+inside ext4_xattr_block_set() where we constantly keep finding xattr
+block for reuse in mbcache but we are unable to reuse it because its
+reference count is too big. This happens because cache entry for the
+xattr block is marked as reusable (e_reusable set) although its
+reference count is too big. When this inconsistency happens, this
+inconsistent state is kept indefinitely and so ext4_xattr_block_set()
+keeps retrying indefinitely.
+
+The inconsistent state is caused by non-atomic update of e_reusable bit.
+e_reusable is part of a bitfield and e_reusable update can race with
+update of e_referenced bit in the same bitfield resulting in loss of one
+of the updates. Fix the problem by using atomic bitops instead.
+
+This bug has been around for many years, but it became *much* easier
+to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr
+blocks").
+
+Cc: stable@vger.kernel.org
+Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries")
+Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks")
+Reported-and-tested-by: Jeremi Piotrowski <jpiotrowski@linux.microsoft.com>
+Reported-by: Thilo Fromm <t-lo@linux.microsoft.com>
+Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c         |  4 ++--
+ fs/mbcache.c            | 14 ++++++++------
+ include/linux/mbcache.h |  9 +++++++--
+ 3 files changed, 17 insertions(+), 10 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 35251afdf770..6bf1c62eff04 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1275,7 +1275,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+                               ce = mb_cache_entry_get(ea_block_cache, hash,
+                                                       bh->b_blocknr);
+                               if (ce) {
+-                                      ce->e_reusable = 1;
++                                      set_bit(MBE_REUSABLE_B, &ce->e_flags);
+                                       mb_cache_entry_put(ea_block_cache, ce);
+                               }
+                       }
+@@ -2037,7 +2037,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               }
+                               BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+                               if (ref == EXT4_XATTR_REFCOUNT_MAX)
+-                                      ce->e_reusable = 0;
++                                      clear_bit(MBE_REUSABLE_B, &ce->e_flags);
+                               ea_bdebug(new_bh, "reusing; refcount now=%d",
+                                         ref);
+                               ext4_xattr_block_csum_set(inode, new_bh);
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 950f1829a7fd..7a12ae87c806 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+       atomic_set(&entry->e_refcnt, 1);
+       entry->e_key = key;
+       entry->e_value = value;
+-      entry->e_reusable = reusable;
+-      entry->e_referenced = 0;
++      entry->e_flags = 0;
++      if (reusable)
++              set_bit(MBE_REUSABLE_B, &entry->e_flags);
+       head = mb_cache_entry_head(cache, key);
+       hlist_bl_lock(head);
+       hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+@@ -162,7 +163,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+       while (node) {
+               entry = hlist_bl_entry(node, struct mb_cache_entry,
+                                      e_hash_list);
+-              if (entry->e_key == key && entry->e_reusable &&
++              if (entry->e_key == key &&
++                  test_bit(MBE_REUSABLE_B, &entry->e_flags) &&
+                   atomic_inc_not_zero(&entry->e_refcnt))
+                       goto out;
+               node = node->next;
+@@ -318,7 +320,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+ void mb_cache_entry_touch(struct mb_cache *cache,
+                         struct mb_cache_entry *entry)
+ {
+-      entry->e_referenced = 1;
++      set_bit(MBE_REFERENCED_B, &entry->e_flags);
+ }
+ EXPORT_SYMBOL(mb_cache_entry_touch);
+@@ -343,9 +345,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+               entry = list_first_entry(&cache->c_list,
+                                        struct mb_cache_entry, e_list);
+               /* Drop initial hash reference if there is no user */
+-              if (entry->e_referenced ||
++              if (test_bit(MBE_REFERENCED_B, &entry->e_flags) ||
+                   atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
+-                      entry->e_referenced = 0;
++                      clear_bit(MBE_REFERENCED_B, &entry->e_flags);
+                       list_move_tail(&entry->e_list, &cache->c_list);
+                       continue;
+               }
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index e9d5ece87794..591bc4cefe1d 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -10,6 +10,12 @@
+ struct mb_cache;
++/* Cache entry flags */
++enum {
++      MBE_REFERENCED_B = 0,
++      MBE_REUSABLE_B
++};
++
+ struct mb_cache_entry {
+       /* List of entries in cache - protected by cache->c_list_lock */
+       struct list_head        e_list;
+@@ -26,8 +32,7 @@ struct mb_cache_entry {
+       atomic_t                e_refcnt;
+       /* Key in hash - stable during lifetime of the entry */
+       u32                     e_key;
+-      u32                     e_referenced:1;
+-      u32                     e_reusable:1;
++      unsigned long           e_flags;
+       /* User provided value - stable during lifetime of the entry */
+       u64                     e_value;
+ };
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch b/queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch
new file mode 100644 (file)
index 0000000..ff6d70a
--- /dev/null
@@ -0,0 +1,48 @@
+From f5248d98f78ebaa8c7a3f5f96c9a8f75988b8b9c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 6 Nov 2022 14:48:37 -0800
+Subject: ext4: fix leaking uninitialized memory in fast-commit journal
+
+From: Eric Biggers <ebiggers@google.com>
+
+[ Upstream commit 594bc43b410316d70bb42aeff168837888d96810 ]
+
+When space at the end of fast-commit journal blocks is unused, make sure
+to zero it out so that uninitialized memory is not leaked to disk.
+
+Fixes: aa75f4d3daae ("ext4: main fast-commit commit path")
+Cc: <stable@vger.kernel.org> # v5.10+
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20221106224841.279231-4-ebiggers@kernel.org
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/fast_commit.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 3b2d6106a703..eaa26477bceb 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -628,6 +628,9 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc)
+               *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl));
+       if (pad_len > 0)
+               ext4_fc_memzero(sb, tl + 1, pad_len, crc);
++      /* Don't leak uninitialized memory in the unused last byte. */
++      *((u8 *)(tl + 1) + pad_len) = 0;
++
+       ext4_fc_submit_bh(sb);
+       ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
+@@ -684,6 +687,8 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
+       dst += sizeof(tail.fc_tid);
+       tail.fc_crc = cpu_to_le32(crc);
+       ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
++      dst += sizeof(tail.fc_crc);
++      memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */
+       ext4_fc_submit_bh(sb);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch b/queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch
new file mode 100644 (file)
index 0000000..1ee5673
--- /dev/null
@@ -0,0 +1,180 @@
+From 4cae8ae405bffc57437d22fe54d6abbd9ff15725 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:24 +0200
+Subject: ext4: fix race when reusing xattr blocks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ]
+
+When ext4_xattr_block_set() decides to remove xattr block the following
+race can happen:
+
+CPU1                                    CPU2
+ext4_xattr_block_set()                  ext4_xattr_release_block()
+  new_bh = ext4_xattr_block_cache_find()
+
+                                          lock_buffer(bh);
+                                          ref = le32_to_cpu(BHDR(bh)->h_refcount);
+                                          if (ref == 1) {
+                                            ...
+                                            mb_cache_entry_delete();
+                                            unlock_buffer(bh);
+                                            ext4_free_blocks();
+                                              ...
+                                              ext4_forget(..., bh, ...);
+                                                jbd2_journal_revoke(..., bh);
+
+  ext4_journal_get_write_access(..., new_bh, ...)
+    do_get_write_access()
+      jbd2_journal_cancel_revoke(..., new_bh);
+
+Later the code in ext4_xattr_block_set() finds out the block got freed
+and cancels reusal of the block but the revoke stays canceled and so in
+case of block reuse and journal replay the filesystem can get corrupted.
+If the race works out slightly differently, we can also hit assertions
+in the jbd2 code.
+
+Fix the problem by making sure that once matching mbcache entry is
+found, code dropping the last xattr block reference (or trying to modify
+xattr block in place) waits until the mbcache entry reference is
+dropped. This way code trying to reuse xattr block is protected from
+someone trying to drop the last reference to xattr block.
+
+Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 45 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 9d5ccc90eb63..35251afdf770 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ /* Remove entry from mbcache when EA inode is getting evicted */
+ void ext4_evict_ea_inode(struct inode *inode)
+ {
+-      if (EA_INODE_CACHE(inode))
+-              mb_cache_entry_delete(EA_INODE_CACHE(inode),
+-                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++      struct mb_cache_entry *oe;
++
++      if (!EA_INODE_CACHE(inode))
++              return;
++      /* Wait for entry to get unused so that we can remove it */
++      while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
++              mb_cache_entry_wait_unused(oe);
++              mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
++      }
+ }
+ static int
+@@ -1223,6 +1230,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+       if (error)
+               goto out;
++retry_ref:
+       lock_buffer(bh);
+       hash = le32_to_cpu(BHDR(bh)->h_hash);
+       ref = le32_to_cpu(BHDR(bh)->h_refcount);
+@@ -1232,9 +1240,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+                * This must happen under buffer lock for
+                * ext4_xattr_block_set() to reliably detect freed block
+                */
+-              if (ea_block_cache)
+-                      mb_cache_entry_delete(ea_block_cache, hash,
+-                                            bh->b_blocknr);
++              if (ea_block_cache) {
++                      struct mb_cache_entry *oe;
++
++                      oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
++                                                        bh->b_blocknr);
++                      if (oe) {
++                              unlock_buffer(bh);
++                              mb_cache_entry_wait_unused(oe);
++                              mb_cache_entry_put(ea_block_cache, oe);
++                              goto retry_ref;
++                      }
++              }
+               get_bh(bh);
+               unlock_buffer(bh);
+@@ -1862,9 +1879,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                        * ext4_xattr_block_set() to reliably detect modified
+                        * block
+                        */
+-                      if (ea_block_cache)
+-                              mb_cache_entry_delete(ea_block_cache, hash,
+-                                                    bs->bh->b_blocknr);
++                      if (ea_block_cache) {
++                              struct mb_cache_entry *oe;
++
++                              oe = mb_cache_entry_delete_or_get(ea_block_cache,
++                                      hash, bs->bh->b_blocknr);
++                              if (oe) {
++                                      /*
++                                       * Xattr block is getting reused. Leave
++                                       * it alone.
++                                       */
++                                      mb_cache_entry_put(ea_block_cache, oe);
++                                      goto clone_block;
++                              }
++                      }
+                       ea_bdebug(bs->bh, "modifying in-place");
+                       error = ext4_xattr_set_entry(i, s, handle, inode,
+                                                    true /* is_block */);
+@@ -1880,6 +1908,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               goto cleanup;
+                       goto inserted;
+               }
++clone_block:
+               unlock_buffer(bs->bh);
+               ea_bdebug(bs->bh, "cloning");
+               s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+@@ -1985,18 +2014,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               lock_buffer(new_bh);
+                               /*
+                                * We have to be careful about races with
+-                               * freeing, rehashing or adding references to
+-                               * xattr block. Once we hold buffer lock xattr
+-                               * block's state is stable so we can check
+-                               * whether the block got freed / rehashed or
+-                               * not.  Since we unhash mbcache entry under
+-                               * buffer lock when freeing / rehashing xattr
+-                               * block, checking whether entry is still
+-                               * hashed is reliable. Same rules hold for
+-                               * e_reusable handling.
++                               * adding references to xattr block. Once we
++                               * hold buffer lock xattr block's state is
++                               * stable so we can check the additional
++                               * reference fits.
+                                */
+-                              if (hlist_bl_unhashed(&ce->e_hash_list) ||
+-                                  !ce->e_reusable) {
++                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
++                              if (ref > EXT4_XATTR_REFCOUNT_MAX) {
+                                       /*
+                                        * Undo everything and check mbcache
+                                        * again.
+@@ -2011,9 +2035,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                                       new_bh = NULL;
+                                       goto inserted;
+                               }
+-                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+                               BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+-                              if (ref >= EXT4_XATTR_REFCOUNT_MAX)
++                              if (ref == EXT4_XATTR_REFCOUNT_MAX)
+                                       ce->e_reusable = 0;
+                               ea_bdebug(new_bh, "reusing; refcount now=%d",
+                                         ref);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-fix-various-seppling-typos.patch b/queue-5.10/ext4-fix-various-seppling-typos.patch
new file mode 100644 (file)
index 0000000..d8c452c
--- /dev/null
@@ -0,0 +1,142 @@
+From 72772ad0d3651502c14a5046229bd2a11a94dbbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 27 Mar 2021 16:00:05 +0530
+Subject: ext4: fix various seppling typos
+
+From: Bhaskar Chowdhury <unixbhaskar@gmail.com>
+
+[ Upstream commit 3088e5a5153cda27ec26461e5edf2821e15e802c ]
+
+Signed-off-by: Bhaskar Chowdhury <unixbhaskar@gmail.com>
+Link: https://lore.kernel.org/r/cover.1616840203.git.unixbhaskar@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/fast_commit.c | 2 +-
+ fs/ext4/indirect.c    | 2 +-
+ fs/ext4/inline.c      | 2 +-
+ fs/ext4/inode.c       | 2 +-
+ fs/ext4/mballoc.h     | 2 +-
+ fs/ext4/migrate.c     | 6 +++---
+ fs/ext4/namei.c       | 2 +-
+ fs/ext4/xattr.c       | 2 +-
+ 8 files changed, 10 insertions(+), 10 deletions(-)
+
+diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c
+index 41dcf21558c4..3b2d6106a703 100644
+--- a/fs/ext4/fast_commit.c
++++ b/fs/ext4/fast_commit.c
+@@ -66,7 +66,7 @@
+  * Fast Commit Ineligibility
+  * -------------------------
+  * Not all operations are supported by fast commits today (e.g extended
+- * attributes). Fast commit ineligiblity is marked by calling one of the
++ * attributes). Fast commit ineligibility is marked by calling one of the
+  * two following functions:
+  *
+  * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
+diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
+index b7d130f4b5e4..237983cd8cdc 100644
+--- a/fs/ext4/indirect.c
++++ b/fs/ext4/indirect.c
+@@ -712,7 +712,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode,
+ /*
+  * Truncate transactions can be complex and absolutely huge.  So we need to
+- * be able to restart the transaction at a conventient checkpoint to make
++ * be able to restart the transaction at a convenient checkpoint to make
+  * sure we don't overflow the journal.
+  *
+  * Try to extend this transaction for the purposes of truncation.  If
+diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
+index 88bd1d1cca23..77377befbb1c 100644
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -799,7 +799,7 @@ ext4_journalled_write_inline_data(struct inode *inode,
+  *    clear the inode state safely.
+  * 2. The inode has inline data, then we need to read the data, make it
+  *    update and dirty so that ext4_da_writepages can handle it. We don't
+- *    need to start the journal since the file's metatdata isn't changed now.
++ *    need to start the journal since the file's metadata isn't changed now.
+  */
+ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
+                                                struct inode *inode,
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index d7dbe1eb9da0..2d3004b3fc56 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3885,7 +3885,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
+  * starting from file offset 'from'.  The range to be zero'd must
+  * be contained with in one block.  If the specified range exceeds
+  * the end of the block it will be shortened to end of the block
+- * that cooresponds to 'from'
++ * that corresponds to 'from'
+  */
+ static int ext4_block_zero_page_range(handle_t *handle,
+               struct address_space *mapping, loff_t from, loff_t length)
+diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
+index e75b4749aa1c..7be6288e48ec 100644
+--- a/fs/ext4/mballoc.h
++++ b/fs/ext4/mballoc.h
+@@ -59,7 +59,7 @@
+  * by the stream allocator, which purpose is to pack requests
+  * as close each to other as possible to produce smooth I/O traffic
+  * We use locality group prealloc space for stream request.
+- * We can tune the same via /proc/fs/ext4/<parition>/stream_req
++ * We can tune the same via /proc/fs/ext4/<partition>/stream_req
+  */
+ #define MB_DEFAULT_STREAM_THRESHOLD   16      /* 64K */
+diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
+index 4bfe2252d9a4..b0ea646454ac 100644
+--- a/fs/ext4/migrate.c
++++ b/fs/ext4/migrate.c
+@@ -32,7 +32,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
+       newext.ee_block = cpu_to_le32(lb->first_block);
+       newext.ee_len   = cpu_to_le16(lb->last_block - lb->first_block + 1);
+       ext4_ext_store_pblock(&newext, lb->first_pblock);
+-      /* Locking only for convinience since we are operating on temp inode */
++      /* Locking only for convenience since we are operating on temp inode */
+       down_write(&EXT4_I(inode)->i_data_sem);
+       path = ext4_find_extent(inode, lb->first_block, NULL, 0);
+       if (IS_ERR(path)) {
+@@ -43,8 +43,8 @@ static int finish_range(handle_t *handle, struct inode *inode,
+       /*
+        * Calculate the credit needed to inserting this extent
+-       * Since we are doing this in loop we may accumalate extra
+-       * credit. But below we try to not accumalate too much
++       * Since we are doing this in loop we may accumulate extra
++       * credit. But below we try to not accumulate too much
+        * of them by restarting the journal.
+        */
+       needed = ext4_ext_calc_credits_for_single_extent(inode,
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index c17d5f399f9e..ce4962bb62bc 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -995,7 +995,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
+        * If the hash is 1, then continue only if the next page has a
+        * continuation hash of any value.  This is used for readdir
+        * handling.  Otherwise, check to see if the hash matches the
+-       * desired contiuation hash.  If it doesn't, return since
++       * desired continuation hash.  If it doesn't, return since
+        * there's no point to read in the successive index pages.
+        */
+       bhash = dx_get_hash(p->at);
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 795ef72f0d3c..74d045b426dd 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1617,7 +1617,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i,
+                * If storing the value in an external inode is an option,
+                * reserve space for xattr entries/names in the external
+                * attribute block so that a long value does not occupy the
+-               * whole space and prevent futher entries being added.
++               * whole space and prevent further entries being added.
+                */
+               if (ext4_has_feature_ea_inode(inode->i_sb) &&
+                   new_size && is_block &&
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-goto-right-label-failed_mount3a.patch b/queue-5.10/ext4-goto-right-label-failed_mount3a.patch
new file mode 100644 (file)
index 0000000..c58ba3a
--- /dev/null
@@ -0,0 +1,69 @@
+From c37c890467d3573aa820e8ca32e3686da528a79c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Sep 2022 22:15:12 +0800
+Subject: ext4: goto right label 'failed_mount3a'
+
+From: Jason Yan <yanaijie@huawei.com>
+
+[ Upstream commit 43bd6f1b49b61f43de4d4e33661b8dbe8c911f14 ]
+
+Before these two branches neither loaded the journal nor created the
+xattr cache. So the right label to goto is 'failed_mount3a'. Although
+this did not cause any issues because the error handler validated if the
+pointer is null. However this still made me confused when reading
+the code. So it's still worth to modify to goto the right label.
+
+Signed-off-by: Jason Yan <yanaijie@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220916141527.1012715-2-yanaijie@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 89481b5fa8c0 ("ext4: correct inconsistent error msg in nojournal mode")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index aa7bcc856de9..eb82c1d4883c 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -4809,30 +4809,30 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
+                  ext4_has_feature_journal_needs_recovery(sb)) {
+               ext4_msg(sb, KERN_ERR, "required journal recovery "
+                      "suppressed and not mounted read-only");
+-              goto failed_mount_wq;
++              goto failed_mount3a;
+       } else {
+               /* Nojournal mode, all journal mount options are illegal */
+               if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "journal_checksum, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "journal_async_commit, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "commit=%lu, fs mounted w/o journal",
+                                sbi->s_commit_interval / HZ);
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               if (EXT4_MOUNT_DATA_FLAGS &
+                   (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) {
+                       ext4_msg(sb, KERN_ERR, "can't mount with "
+                                "data=, fs mounted w/o journal");
+-                      goto failed_mount_wq;
++                      goto failed_mount3a;
+               }
+               sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM;
+               clear_opt(sb, JOURNAL_CHECKSUM);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-move-functions-in-super.c.patch b/queue-5.10/ext4-move-functions-in-super.c.patch
new file mode 100644 (file)
index 0000000..f84594b
--- /dev/null
@@ -0,0 +1,240 @@
+From 8e493fa7d24ac097d8be5f683f18de1c0b5d8f8c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Nov 2020 12:33:58 +0100
+Subject: ext4: move functions in super.c
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 4067662388f97d0f360e568820d9d5bac6a3c9fa ]
+
+Just move error info related functions in super.c close to
+ext4_handle_error(). We'll want to combine save_error_info() with
+ext4_handle_error() and this makes change more obvious and saves a
+forward declaration as well. No functional change.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20201127113405.26867-6-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 196 ++++++++++++++++++++++++------------------------
+ 1 file changed, 98 insertions(+), 98 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 43f06a71d612..982341939a27 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -417,104 +417,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi)
+ #define ext4_get_tstamp(es, tstamp) \
+       __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi)
+-static void __save_error_info(struct super_block *sb, int error,
+-                            __u32 ino, __u64 block,
+-                            const char *func, unsigned int line)
+-{
+-      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+-      int err;
+-
+-      EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+-      if (bdev_read_only(sb->s_bdev))
+-              return;
+-      es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+-      ext4_update_tstamp(es, s_last_error_time);
+-      strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+-      es->s_last_error_line = cpu_to_le32(line);
+-      es->s_last_error_ino = cpu_to_le32(ino);
+-      es->s_last_error_block = cpu_to_le64(block);
+-      switch (error) {
+-      case EIO:
+-              err = EXT4_ERR_EIO;
+-              break;
+-      case ENOMEM:
+-              err = EXT4_ERR_ENOMEM;
+-              break;
+-      case EFSBADCRC:
+-              err = EXT4_ERR_EFSBADCRC;
+-              break;
+-      case 0:
+-      case EFSCORRUPTED:
+-              err = EXT4_ERR_EFSCORRUPTED;
+-              break;
+-      case ENOSPC:
+-              err = EXT4_ERR_ENOSPC;
+-              break;
+-      case ENOKEY:
+-              err = EXT4_ERR_ENOKEY;
+-              break;
+-      case EROFS:
+-              err = EXT4_ERR_EROFS;
+-              break;
+-      case EFBIG:
+-              err = EXT4_ERR_EFBIG;
+-              break;
+-      case EEXIST:
+-              err = EXT4_ERR_EEXIST;
+-              break;
+-      case ERANGE:
+-              err = EXT4_ERR_ERANGE;
+-              break;
+-      case EOVERFLOW:
+-              err = EXT4_ERR_EOVERFLOW;
+-              break;
+-      case EBUSY:
+-              err = EXT4_ERR_EBUSY;
+-              break;
+-      case ENOTDIR:
+-              err = EXT4_ERR_ENOTDIR;
+-              break;
+-      case ENOTEMPTY:
+-              err = EXT4_ERR_ENOTEMPTY;
+-              break;
+-      case ESHUTDOWN:
+-              err = EXT4_ERR_ESHUTDOWN;
+-              break;
+-      case EFAULT:
+-              err = EXT4_ERR_EFAULT;
+-              break;
+-      default:
+-              err = EXT4_ERR_UNKNOWN;
+-      }
+-      es->s_last_error_errcode = err;
+-      if (!es->s_first_error_time) {
+-              es->s_first_error_time = es->s_last_error_time;
+-              es->s_first_error_time_hi = es->s_last_error_time_hi;
+-              strncpy(es->s_first_error_func, func,
+-                      sizeof(es->s_first_error_func));
+-              es->s_first_error_line = cpu_to_le32(line);
+-              es->s_first_error_ino = es->s_last_error_ino;
+-              es->s_first_error_block = es->s_last_error_block;
+-              es->s_first_error_errcode = es->s_last_error_errcode;
+-      }
+-      /*
+-       * Start the daily error reporting function if it hasn't been
+-       * started already
+-       */
+-      if (!es->s_error_count)
+-              mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
+-      le32_add_cpu(&es->s_error_count, 1);
+-}
+-
+-static void save_error_info(struct super_block *sb, int error,
+-                          __u32 ino, __u64 block,
+-                          const char *func, unsigned int line)
+-{
+-      __save_error_info(sb, error, ino, block, func, line);
+-      if (!bdev_read_only(sb->s_bdev))
+-              ext4_commit_super(sb, 1);
+-}
+-
+ /*
+  * The del_gendisk() function uninitializes the disk-specific data
+  * structures, including the bdi structure, without telling anyone
+@@ -643,6 +545,104 @@ static bool system_going_down(void)
+               || system_state == SYSTEM_RESTART;
+ }
++static void __save_error_info(struct super_block *sb, int error,
++                            __u32 ino, __u64 block,
++                            const char *func, unsigned int line)
++{
++      struct ext4_super_block *es = EXT4_SB(sb)->s_es;
++      int err;
++
++      EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
++      if (bdev_read_only(sb->s_bdev))
++              return;
++      es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
++      ext4_update_tstamp(es, s_last_error_time);
++      strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
++      es->s_last_error_line = cpu_to_le32(line);
++      es->s_last_error_ino = cpu_to_le32(ino);
++      es->s_last_error_block = cpu_to_le64(block);
++      switch (error) {
++      case EIO:
++              err = EXT4_ERR_EIO;
++              break;
++      case ENOMEM:
++              err = EXT4_ERR_ENOMEM;
++              break;
++      case EFSBADCRC:
++              err = EXT4_ERR_EFSBADCRC;
++              break;
++      case 0:
++      case EFSCORRUPTED:
++              err = EXT4_ERR_EFSCORRUPTED;
++              break;
++      case ENOSPC:
++              err = EXT4_ERR_ENOSPC;
++              break;
++      case ENOKEY:
++              err = EXT4_ERR_ENOKEY;
++              break;
++      case EROFS:
++              err = EXT4_ERR_EROFS;
++              break;
++      case EFBIG:
++              err = EXT4_ERR_EFBIG;
++              break;
++      case EEXIST:
++              err = EXT4_ERR_EEXIST;
++              break;
++      case ERANGE:
++              err = EXT4_ERR_ERANGE;
++              break;
++      case EOVERFLOW:
++              err = EXT4_ERR_EOVERFLOW;
++              break;
++      case EBUSY:
++              err = EXT4_ERR_EBUSY;
++              break;
++      case ENOTDIR:
++              err = EXT4_ERR_ENOTDIR;
++              break;
++      case ENOTEMPTY:
++              err = EXT4_ERR_ENOTEMPTY;
++              break;
++      case ESHUTDOWN:
++              err = EXT4_ERR_ESHUTDOWN;
++              break;
++      case EFAULT:
++              err = EXT4_ERR_EFAULT;
++              break;
++      default:
++              err = EXT4_ERR_UNKNOWN;
++      }
++      es->s_last_error_errcode = err;
++      if (!es->s_first_error_time) {
++              es->s_first_error_time = es->s_last_error_time;
++              es->s_first_error_time_hi = es->s_last_error_time_hi;
++              strncpy(es->s_first_error_func, func,
++                      sizeof(es->s_first_error_func));
++              es->s_first_error_line = cpu_to_le32(line);
++              es->s_first_error_ino = es->s_last_error_ino;
++              es->s_first_error_block = es->s_last_error_block;
++              es->s_first_error_errcode = es->s_last_error_errcode;
++      }
++      /*
++       * Start the daily error reporting function if it hasn't been
++       * started already
++       */
++      if (!es->s_error_count)
++              mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
++      le32_add_cpu(&es->s_error_count, 1);
++}
++
++static void save_error_info(struct super_block *sb, int error,
++                          __u32 ino, __u64 block,
++                          const char *func, unsigned int line)
++{
++      __save_error_info(sb, error, ino, block, func, line);
++      if (!bdev_read_only(sb->s_bdev))
++              ext4_commit_super(sb, 1);
++}
++
+ /* Deal with the reporting of failure conditions on a filesystem such as
+  * inconsistencies detected or read IO failures.
+  *
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch b/queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
new file mode 100644 (file)
index 0000000..ed914bb
--- /dev/null
@@ -0,0 +1,117 @@
+From 6e7fa1c6130b48ca06f96e5a6de00aabf64c231b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:22 +0200
+Subject: ext4: remove EA inode entry from mbcache on inode eviction
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 6bc0d63dad7f9f54d381925ee855b402f652fa39 ]
+
+Currently we remove EA inode from mbcache as soon as its xattr refcount
+drops to zero. However there can be pending attempts to reuse the inode
+and thus refcount handling code has to handle the situation when
+refcount increases from zero anyway. So save some work and just keep EA
+inode in mbcache until it is getting evicted. At that moment we are sure
+following iget() of EA inode will fail anyway (or wait for eviction to
+finish and load things from the disk again) and so removing mbcache
+entry at that moment is fine and simplifies the code a bit.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c |  2 ++
+ fs/ext4/xattr.c | 24 ++++++++----------------
+ fs/ext4/xattr.h |  1 +
+ 3 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 2d3004b3fc56..355343cf4609 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -179,6 +179,8 @@ void ext4_evict_inode(struct inode *inode)
+       trace_ext4_evict_inode(inode);
++      if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
++              ext4_evict_ea_inode(inode);
+       if (inode->i_nlink) {
+               /*
+                * When journalling data dirty buffers are tracked only in the
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 0b682c92bfe9..0555f32f0fd4 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+       return err;
+ }
++/* Remove entry from mbcache when EA inode is getting evicted */
++void ext4_evict_ea_inode(struct inode *inode)
++{
++      if (EA_INODE_CACHE(inode))
++              mb_cache_entry_delete(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++}
++
+ static int
+ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+                              struct ext4_xattr_entry *entry, void *buffer,
+@@ -972,10 +980,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                                      int ref_change)
+ {
+-      struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+       struct ext4_iloc iloc;
+       s64 ref_count;
+-      u32 hash;
+       int ret;
+       inode_lock(ea_inode);
+@@ -998,14 +1004,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                       set_nlink(ea_inode, 1);
+                       ext4_orphan_del(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_create(ea_inode_cache,
+-                                                    GFP_NOFS, hash,
+-                                                    ea_inode->i_ino,
+-                                                    true /* reusable */);
+-                      }
+               }
+       } else {
+               WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+@@ -1018,12 +1016,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                       clear_nlink(ea_inode);
+                       ext4_orphan_add(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_delete(ea_inode_cache, hash,
+-                                                    ea_inode->i_ino);
+-                      }
+               }
+       }
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index 87e5863bb493..b357872ab83b 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -191,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                           struct ext4_inode *raw_inode, handle_t *handle);
++extern void ext4_evict_ea_inode(struct inode *inode);
+ extern const struct xattr_handler *ext4_xattr_handlers[];
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-simplify-ext4-error-translation.patch b/queue-5.10/ext4-simplify-ext4-error-translation.patch
new file mode 100644 (file)
index 0000000..f145aaa
--- /dev/null
@@ -0,0 +1,146 @@
+From 99fadec5f5b52abc5ea4fd166e99dba4a1719a42 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Nov 2020 12:33:59 +0100
+Subject: ext4: simplify ext4 error translation
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 02a7780e4d2fcf438ac6773bc469e7ada2af56be ]
+
+We convert errno's to ext4 on-disk format error codes in
+save_error_info(). Add a function and a bit of macro magic to make this
+simpler.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20201127113405.26867-7-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/super.c | 95 +++++++++++++++++++++----------------------------
+ 1 file changed, 40 insertions(+), 55 deletions(-)
+
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 982341939a27..ced84ed4e592 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -545,76 +545,61 @@ static bool system_going_down(void)
+               || system_state == SYSTEM_RESTART;
+ }
++struct ext4_err_translation {
++      int code;
++      int errno;
++};
++
++#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err }
++
++static struct ext4_err_translation err_translation[] = {
++      EXT4_ERR_TRANSLATE(EIO),
++      EXT4_ERR_TRANSLATE(ENOMEM),
++      EXT4_ERR_TRANSLATE(EFSBADCRC),
++      EXT4_ERR_TRANSLATE(EFSCORRUPTED),
++      EXT4_ERR_TRANSLATE(ENOSPC),
++      EXT4_ERR_TRANSLATE(ENOKEY),
++      EXT4_ERR_TRANSLATE(EROFS),
++      EXT4_ERR_TRANSLATE(EFBIG),
++      EXT4_ERR_TRANSLATE(EEXIST),
++      EXT4_ERR_TRANSLATE(ERANGE),
++      EXT4_ERR_TRANSLATE(EOVERFLOW),
++      EXT4_ERR_TRANSLATE(EBUSY),
++      EXT4_ERR_TRANSLATE(ENOTDIR),
++      EXT4_ERR_TRANSLATE(ENOTEMPTY),
++      EXT4_ERR_TRANSLATE(ESHUTDOWN),
++      EXT4_ERR_TRANSLATE(EFAULT),
++};
++
++static int ext4_errno_to_code(int errno)
++{
++      int i;
++
++      for (i = 0; i < ARRAY_SIZE(err_translation); i++)
++              if (err_translation[i].errno == errno)
++                      return err_translation[i].code;
++      return EXT4_ERR_UNKNOWN;
++}
++
+ static void __save_error_info(struct super_block *sb, int error,
+                             __u32 ino, __u64 block,
+                             const char *func, unsigned int line)
+ {
+       struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+-      int err;
+       EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+       if (bdev_read_only(sb->s_bdev))
+               return;
++      /* We default to EFSCORRUPTED error... */
++      if (error == 0)
++              error = EFSCORRUPTED;
+       es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+       ext4_update_tstamp(es, s_last_error_time);
+       strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func));
+       es->s_last_error_line = cpu_to_le32(line);
+       es->s_last_error_ino = cpu_to_le32(ino);
+       es->s_last_error_block = cpu_to_le64(block);
+-      switch (error) {
+-      case EIO:
+-              err = EXT4_ERR_EIO;
+-              break;
+-      case ENOMEM:
+-              err = EXT4_ERR_ENOMEM;
+-              break;
+-      case EFSBADCRC:
+-              err = EXT4_ERR_EFSBADCRC;
+-              break;
+-      case 0:
+-      case EFSCORRUPTED:
+-              err = EXT4_ERR_EFSCORRUPTED;
+-              break;
+-      case ENOSPC:
+-              err = EXT4_ERR_ENOSPC;
+-              break;
+-      case ENOKEY:
+-              err = EXT4_ERR_ENOKEY;
+-              break;
+-      case EROFS:
+-              err = EXT4_ERR_EROFS;
+-              break;
+-      case EFBIG:
+-              err = EXT4_ERR_EFBIG;
+-              break;
+-      case EEXIST:
+-              err = EXT4_ERR_EEXIST;
+-              break;
+-      case ERANGE:
+-              err = EXT4_ERR_ERANGE;
+-              break;
+-      case EOVERFLOW:
+-              err = EXT4_ERR_EOVERFLOW;
+-              break;
+-      case EBUSY:
+-              err = EXT4_ERR_EBUSY;
+-              break;
+-      case ENOTDIR:
+-              err = EXT4_ERR_ENOTDIR;
+-              break;
+-      case ENOTEMPTY:
+-              err = EXT4_ERR_ENOTEMPTY;
+-              break;
+-      case ESHUTDOWN:
+-              err = EXT4_ERR_ESHUTDOWN;
+-              break;
+-      case EFAULT:
+-              err = EXT4_ERR_EFAULT;
+-              break;
+-      default:
+-              err = EXT4_ERR_UNKNOWN;
+-      }
+-      es->s_last_error_errcode = err;
++      es->s_last_error_errcode = ext4_errno_to_code(error);
+       if (!es->s_first_error_time) {
+               es->s_first_error_time = es->s_last_error_time;
+               es->s_first_error_time_hi = es->s_last_error_time_hi;
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch b/queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
new file mode 100644 (file)
index 0000000..7ba4347
--- /dev/null
@@ -0,0 +1,126 @@
+From 1ca65c5d79d24e53bcc991f57037853316bcaf3a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:23 +0200
+Subject: ext4: unindent codeblock in ext4_xattr_block_set()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit fd48e9acdf26d0cbd80051de07d4a735d05d29b2 ]
+
+Remove unnecessary else (and thus indentation level) from a code block
+in ext4_xattr_block_set(). It will also make following code changes
+easier. No functional changes.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 77 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 39 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 0555f32f0fd4..9d5ccc90eb63 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1846,6 +1846,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ #define header(x) ((struct ext4_xattr_header *)(x))
+       if (s->base) {
++              int offset = (char *)s->here - bs->bh->b_data;
++
+               BUFFER_TRACE(bs->bh, "get_write_access");
+               error = ext4_journal_get_write_access(handle, bs->bh);
+               if (error)
+@@ -1877,49 +1879,46 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       if (error)
+                               goto cleanup;
+                       goto inserted;
+-              } else {
+-                      int offset = (char *)s->here - bs->bh->b_data;
++              }
++              unlock_buffer(bs->bh);
++              ea_bdebug(bs->bh, "cloning");
++              s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
++              error = -ENOMEM;
++              if (s->base == NULL)
++                      goto cleanup;
++              s->first = ENTRY(header(s->base)+1);
++              header(s->base)->h_refcount = cpu_to_le32(1);
++              s->here = ENTRY(s->base + offset);
++              s->end = s->base + bs->bh->b_size;
+-                      unlock_buffer(bs->bh);
+-                      ea_bdebug(bs->bh, "cloning");
+-                      s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+-                      error = -ENOMEM;
+-                      if (s->base == NULL)
++              /*
++               * If existing entry points to an xattr inode, we need
++               * to prevent ext4_xattr_set_entry() from decrementing
++               * ref count on it because the reference belongs to the
++               * original block. In this case, make the entry look
++               * like it has an empty value.
++               */
++              if (!s->not_found && s->here->e_value_inum) {
++                      ea_ino = le32_to_cpu(s->here->e_value_inum);
++                      error = ext4_xattr_inode_iget(inode, ea_ino,
++                                    le32_to_cpu(s->here->e_hash),
++                                    &tmp_inode);
++                      if (error)
+                               goto cleanup;
+-                      s->first = ENTRY(header(s->base)+1);
+-                      header(s->base)->h_refcount = cpu_to_le32(1);
+-                      s->here = ENTRY(s->base + offset);
+-                      s->end = s->base + bs->bh->b_size;
+-                      /*
+-                       * If existing entry points to an xattr inode, we need
+-                       * to prevent ext4_xattr_set_entry() from decrementing
+-                       * ref count on it because the reference belongs to the
+-                       * original block. In this case, make the entry look
+-                       * like it has an empty value.
+-                       */
+-                      if (!s->not_found && s->here->e_value_inum) {
+-                              ea_ino = le32_to_cpu(s->here->e_value_inum);
+-                              error = ext4_xattr_inode_iget(inode, ea_ino,
+-                                            le32_to_cpu(s->here->e_hash),
+-                                            &tmp_inode);
+-                              if (error)
+-                                      goto cleanup;
+-
+-                              if (!ext4_test_inode_state(tmp_inode,
+-                                              EXT4_STATE_LUSTRE_EA_INODE)) {
+-                                      /*
+-                                       * Defer quota free call for previous
+-                                       * inode until success is guaranteed.
+-                                       */
+-                                      old_ea_inode_quota = le32_to_cpu(
+-                                                      s->here->e_value_size);
+-                              }
+-                              iput(tmp_inode);
+-
+-                              s->here->e_value_inum = 0;
+-                              s->here->e_value_size = 0;
++                      if (!ext4_test_inode_state(tmp_inode,
++                                      EXT4_STATE_LUSTRE_EA_INODE)) {
++                              /*
++                               * Defer quota free call for previous
++                               * inode until success is guaranteed.
++                               */
++                              old_ea_inode_quota = le32_to_cpu(
++                                              s->here->e_value_size);
+                       }
++                      iput(tmp_inode);
++
++                      s->here->e_value_inum = 0;
++                      s->here->e_value_size = 0;
+               }
+       } else {
+               /* Allocate a buffer where we construct the new block. */
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch b/queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch
new file mode 100644 (file)
index 0000000..79272b8
--- /dev/null
@@ -0,0 +1,41 @@
+From d243769f1e1200239d0af80c9781b409717499c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 25 May 2022 11:01:20 +0800
+Subject: ext4: use kmemdup() to replace kmalloc + memcpy
+
+From: Shuqi Zhang <zhangshuqi3@huawei.com>
+
+[ Upstream commit 4efd9f0d120c55b08852ee5605dbb02a77089a5d ]
+
+Replace kmalloc + memcpy with kmemdup()
+
+Signed-off-by: Shuqi Zhang <zhangshuqi3@huawei.com>
+Reviewed-by: Ritesh Harjani <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220525030120.803330-1-zhangshuqi3@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 74d045b426dd..0b682c92bfe9 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1890,11 +1890,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       unlock_buffer(bs->bh);
+                       ea_bdebug(bs->bh, "cloning");
+-                      s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
++                      s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+                       error = -ENOMEM;
+                       if (s->base == NULL)
+                               goto cleanup;
+-                      memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+                       s->first = ENTRY(header(s->base)+1);
+                       header(s->base)->h_refcount = cpu_to_le32(1);
+                       s->here = ENTRY(s->base + offset);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch b/queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch
new file mode 100644 (file)
index 0000000..ac55ad8
--- /dev/null
@@ -0,0 +1,44 @@
+From 32a2a70a21a968ad8206fd7374f76f59530d42ed Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 7 Feb 2021 11:04:23 -0800
+Subject: ext4: use memcpy_to_page() in pagecache_write()
+
+From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+
+[ Upstream commit bd256fda92efe97b692dc72e246d35fa724d42d8 ]
+
+Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Link: https://lore.kernel.org/r/20210207190425.38107-7-chaitanya.kulkarni@wdc.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/verity.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
+index 35be8e7ec2a0..130070ec491b 100644
+--- a/fs/ext4/verity.c
++++ b/fs/ext4/verity.c
+@@ -80,7 +80,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+                                PAGE_SIZE - offset_in_page(pos));
+               struct page *page;
+               void *fsdata;
+-              void *addr;
+               int res;
+               res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+@@ -88,9 +87,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+               if (res)
+                       return res;
+-              addr = kmap_atomic(page);
+-              memcpy(addr + offset_in_page(pos), buf, n);
+-              kunmap_atomic(addr);
++              memcpy_to_page(page, offset_in_page(pos), buf, n);
+               res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n,
+                                         page, fsdata);
+-- 
+2.35.1
+
diff --git a/queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch b/queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch
new file mode 100644 (file)
index 0000000..ef429f0
--- /dev/null
@@ -0,0 +1,89 @@
+From 08714768923658ec3062696c56e0abc88ee7105f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Nov 2022 08:33:09 -0500
+Subject: filelock: new helper: vfs_inode_has_locks
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit ab1ddef98a715eddb65309ffa83267e4e84a571e ]
+
+Ceph has a need to know whether a particular inode has any locks set on
+it. It's currently tracking that by a num_locks field in its
+filp->private_data, but that's problematic as it tries to decrement this
+field when releasing locks and that can race with the file being torn
+down.
+
+Add a new vfs_inode_has_locks helper that just returns whether any locks
+are currently held on the inode.
+
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@infradead.org>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Stable-dep-of: 461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/locks.c         | 23 +++++++++++++++++++++++
+ include/linux/fs.h |  6 ++++++
+ 2 files changed, 29 insertions(+)
+
+diff --git a/fs/locks.c b/fs/locks.c
+index 32c948fe2944..12d72c3d8756 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -2813,6 +2813,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+ }
+ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
++/**
++ * vfs_inode_has_locks - are any file locks held on @inode?
++ * @inode: inode to check for locks
++ *
++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently
++ * set on @inode.
++ */
++bool vfs_inode_has_locks(struct inode *inode)
++{
++      struct file_lock_context *ctx;
++      bool ret;
++
++      ctx = smp_load_acquire(&inode->i_flctx);
++      if (!ctx)
++              return false;
++
++      spin_lock(&ctx->flc_lock);
++      ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock);
++      spin_unlock(&ctx->flc_lock);
++      return ret;
++}
++EXPORT_SYMBOL_GPL(vfs_inode_has_locks);
++
+ #ifdef CONFIG_PROC_FS
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index 9a477e537361..74e19bccbf73 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1145,6 +1145,7 @@ extern int locks_delete_block(struct file_lock *);
+ extern int vfs_test_lock(struct file *, struct file_lock *);
+ extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *);
+ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
++bool vfs_inode_has_locks(struct inode *inode);
+ extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl);
+ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
+ extern void lease_get_mtime(struct inode *, struct timespec64 *time);
+@@ -1257,6 +1258,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
+       return 0;
+ }
++static inline bool vfs_inode_has_locks(struct inode *inode)
++{
++      return false;
++}
++
+ static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
+ {
+       return -ENOLCK;
+-- 
+2.35.1
+
diff --git a/queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch b/queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch
new file mode 100644 (file)
index 0000000..05b97fc
--- /dev/null
@@ -0,0 +1,43 @@
+From 117930a41edd8f7ee8aa352912bceb45a74ff912 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Nov 2022 12:21:30 +0100
+Subject: fs: ext4: initialize fsdata in pagecache_write()
+
+From: Alexander Potapenko <glider@google.com>
+
+[ Upstream commit 956510c0c7439e90b8103aaeaf4da92878c622f0 ]
+
+When aops->write_begin() does not initialize fsdata, KMSAN reports
+an error passing the latter to aops->write_end().
+
+Fix this by unconditionally initializing fsdata.
+
+Cc: Eric Biggers <ebiggers@kernel.org>
+Fixes: c93d8f885809 ("ext4: add basic fs-verity support")
+Reported-by: syzbot+9767be679ef5016b6082@syzkaller.appspotmail.com
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20221121112134.407362-1-glider@google.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/verity.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c
+index 130070ec491b..e3019f920222 100644
+--- a/fs/ext4/verity.c
++++ b/fs/ext4/verity.c
+@@ -79,7 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count,
+               size_t n = min_t(size_t, count,
+                                PAGE_SIZE - offset_in_page(pos));
+               struct page *page;
+-              void *fsdata;
++              void *fsdata = NULL;
+               int res;
+               res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0,
+-- 
+2.35.1
+
diff --git a/queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch b/queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch
new file mode 100644 (file)
index 0000000..21ecc30
--- /dev/null
@@ -0,0 +1,36 @@
+From 2c9bd3bc094387ab4afb5e0f65b2e2f4c820137a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Jan 2023 12:20:39 +0400
+Subject: gpio: sifive: Fix refcount leak in sifive_gpio_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 694175cd8a1643cde3acb45c9294bca44a8e08e9 ]
+
+of_irq_find_parent() returns a node pointer with refcount incremented,
+We should use of_node_put() on it when not needed anymore.
+Add missing of_node_put() to avoid refcount leak.
+
+Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpio-sifive.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c
+index 4f28fa73450c..a42ffb9f3057 100644
+--- a/drivers/gpio/gpio-sifive.c
++++ b/drivers/gpio/gpio-sifive.c
+@@ -195,6 +195,7 @@ static int sifive_gpio_probe(struct platform_device *pdev)
+               return -ENODEV;
+       }
+       parent = irq_find_host(irq_parent);
++      of_node_put(irq_parent);
+       if (!parent) {
+               dev_err(dev, "no IRQ parent domain\n");
+               return -ENODEV;
+-- 
+2.35.1
+
diff --git a/queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch b/queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch
new file mode 100644 (file)
index 0000000..a6a366b
--- /dev/null
@@ -0,0 +1,156 @@
+From ff21a13cc7c1061e54a6d3402303a5f33559ff7f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:21 +0200
+Subject: mbcache: add functions to delete entry if unused
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 3dc96bba65f53daa217f0a8f43edad145286a8f5 ]
+
+Add function mb_cache_entry_delete_or_get() to delete mbcache entry if
+it is unused and also add a function to wait for entry to become unused
+- mb_cache_entry_wait_unused(). We do not share code between the two
+deleting function as one of them will go away soon.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c            | 66 +++++++++++++++++++++++++++++++++++++++--
+ include/linux/mbcache.h | 10 ++++++-
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index cfc28129fb6f..2010bc80a3f2 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -11,7 +11,7 @@
+ /*
+  * Mbcache is a simple key-value store. Keys need not be unique, however
+  * key-value pairs are expected to be unique (we use this fact in
+- * mb_cache_entry_delete()).
++ * mb_cache_entry_delete_or_get()).
+  *
+  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+  * Ext4 also uses it for deduplication of xattr values stored in inodes.
+@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
++/*
++ * mb_cache_entry_wait_unused - wait to be the last user of the entry
++ *
++ * @entry - entry to work on
++ *
++ * Wait to be the last user of the entry.
++ */
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
++{
++      wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
++}
++EXPORT_SYMBOL(mb_cache_entry_wait_unused);
++
+ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+                                          struct mb_cache_entry *entry,
+                                          u32 key)
+@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ }
+ EXPORT_SYMBOL(mb_cache_entry_get);
+-/* mb_cache_entry_delete - remove a cache entry
++/* mb_cache_entry_delete - try to remove a cache entry
+  * @cache - cache we work with
+  * @key - key
+  * @value - value
+@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete);
++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
++ * @cache - cache we work with
++ * @key - key
++ * @value - value
++ *
++ * Remove entry from cache @cache with key @key and value @value. The removal
++ * happens only if the entry is unused. The function returns NULL in case the
++ * entry was successfully removed or there's no entry in cache. Otherwise the
++ * function grabs reference of the entry that we failed to delete because it
++ * still has users and return it.
++ */
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++                                                  u32 key, u64 value)
++{
++      struct hlist_bl_node *node;
++      struct hlist_bl_head *head;
++      struct mb_cache_entry *entry;
++
++      head = mb_cache_entry_head(cache, key);
++      hlist_bl_lock(head);
++      hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
++              if (entry->e_key == key && entry->e_value == value) {
++                      if (atomic_read(&entry->e_refcnt) > 2) {
++                              atomic_inc(&entry->e_refcnt);
++                              hlist_bl_unlock(head);
++                              return entry;
++                      }
++                      /* We keep hash list reference to keep entry alive */
++                      hlist_bl_del_init(&entry->e_hash_list);
++                      hlist_bl_unlock(head);
++                      spin_lock(&cache->c_list_lock);
++                      if (!list_empty(&entry->e_list)) {
++                              list_del_init(&entry->e_list);
++                              if (!WARN_ONCE(cache->c_entry_count == 0,
++              "mbcache: attempt to decrement c_entry_count past zero"))
++                                      cache->c_entry_count--;
++                              atomic_dec(&entry->e_refcnt);
++                      }
++                      spin_unlock(&cache->c_list_lock);
++                      mb_cache_entry_put(cache, entry);
++                      return NULL;
++              }
++      }
++      hlist_bl_unlock(head);
++
++      return NULL;
++}
++EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
++
+ /* mb_cache_entry_touch - cache entry got used
+  * @cache - cache the entry belongs to
+  * @entry - entry that got used
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 20f1e3ff6013..8eca7f25c432 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+                         u64 value, bool reusable);
+ void __mb_cache_entry_free(struct mb_cache_entry *entry);
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+ static inline int mb_cache_entry_put(struct mb_cache *cache,
+                                    struct mb_cache_entry *entry)
+ {
+-      if (!atomic_dec_and_test(&entry->e_refcnt))
++      unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
++
++      if (cnt > 0) {
++              if (cnt <= 3)
++                      wake_up_var(&entry->e_refcnt);
+               return 0;
++      }
+       __mb_cache_entry_free(entry);
+       return 1;
+ }
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++                                                  u32 key, u64 value);
+ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
+ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+                                         u64 value);
+-- 
+2.35.1
+
diff --git a/queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch b/queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch
new file mode 100644 (file)
index 0000000..df80521
--- /dev/null
@@ -0,0 +1,274 @@
+From b2f2aa434be535ae2779b09d45284f35b60e6f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:29 +0200
+Subject: mbcache: automatically delete entries from cache on freeing
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 307af6c879377c1c63e71cbdd978201f9c7ee8df ]
+
+Use the fact that entries with elevated refcount are not removed from
+the hash and just move removal of the entry from the hash to the entry
+freeing time. When doing this we also change the generic code to hold
+one reference to the cache entry, not two of them, which makes code
+somewhat more obvious.
+
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-10-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c            | 108 +++++++++++++++-------------------------
+ include/linux/mbcache.h |  24 ++++++---
+ 2 files changed, 55 insertions(+), 77 deletions(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 2010bc80a3f2..950f1829a7fd 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+               return -ENOMEM;
+       INIT_LIST_HEAD(&entry->e_list);
+-      /* One ref for hash, one ref returned */
++      /* Initial hash reference */
+       atomic_set(&entry->e_refcnt, 1);
+       entry->e_key = key;
+       entry->e_value = value;
+@@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+               }
+       }
+       hlist_bl_add_head(&entry->e_hash_list, head);
+-      hlist_bl_unlock(head);
+-
++      /*
++       * Add entry to LRU list before it can be found by
++       * mb_cache_entry_delete() to avoid races
++       */
+       spin_lock(&cache->c_list_lock);
+       list_add_tail(&entry->e_list, &cache->c_list);
+-      /* Grab ref for LRU list */
+-      atomic_inc(&entry->e_refcnt);
+       cache->c_entry_count++;
+       spin_unlock(&cache->c_list_lock);
++      hlist_bl_unlock(head);
+       return 0;
+ }
+ EXPORT_SYMBOL(mb_cache_entry_create);
+-void __mb_cache_entry_free(struct mb_cache_entry *entry)
++void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry)
+ {
++      struct hlist_bl_head *head;
++
++      head = mb_cache_entry_head(cache, entry->e_key);
++      hlist_bl_lock(head);
++      hlist_bl_del(&entry->e_hash_list);
++      hlist_bl_unlock(head);
+       kmem_cache_free(mb_entry_cache, entry);
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
+@@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free);
+  */
+ void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
+ {
+-      wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
++      wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2);
+ }
+ EXPORT_SYMBOL(mb_cache_entry_wait_unused);
+@@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+       while (node) {
+               entry = hlist_bl_entry(node, struct mb_cache_entry,
+                                      e_hash_list);
+-              if (entry->e_key == key && entry->e_reusable) {
+-                      atomic_inc(&entry->e_refcnt);
++              if (entry->e_key == key && entry->e_reusable &&
++                  atomic_inc_not_zero(&entry->e_refcnt))
+                       goto out;
+-              }
+               node = node->next;
+       }
+       entry = NULL;
+@@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+       head = mb_cache_entry_head(cache, key);
+       hlist_bl_lock(head);
+       hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+-              if (entry->e_key == key && entry->e_value == value) {
+-                      atomic_inc(&entry->e_refcnt);
++              if (entry->e_key == key && entry->e_value == value &&
++                  atomic_inc_not_zero(&entry->e_refcnt))
+                       goto out;
+-              }
+       }
+       entry = NULL;
+ out:
+@@ -281,37 +286,25 @@ EXPORT_SYMBOL(mb_cache_entry_delete);
+ struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+                                                   u32 key, u64 value)
+ {
+-      struct hlist_bl_node *node;
+-      struct hlist_bl_head *head;
+       struct mb_cache_entry *entry;
+-      head = mb_cache_entry_head(cache, key);
+-      hlist_bl_lock(head);
+-      hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+-              if (entry->e_key == key && entry->e_value == value) {
+-                      if (atomic_read(&entry->e_refcnt) > 2) {
+-                              atomic_inc(&entry->e_refcnt);
+-                              hlist_bl_unlock(head);
+-                              return entry;
+-                      }
+-                      /* We keep hash list reference to keep entry alive */
+-                      hlist_bl_del_init(&entry->e_hash_list);
+-                      hlist_bl_unlock(head);
+-                      spin_lock(&cache->c_list_lock);
+-                      if (!list_empty(&entry->e_list)) {
+-                              list_del_init(&entry->e_list);
+-                              if (!WARN_ONCE(cache->c_entry_count == 0,
+-              "mbcache: attempt to decrement c_entry_count past zero"))
+-                                      cache->c_entry_count--;
+-                              atomic_dec(&entry->e_refcnt);
+-                      }
+-                      spin_unlock(&cache->c_list_lock);
+-                      mb_cache_entry_put(cache, entry);
+-                      return NULL;
+-              }
+-      }
+-      hlist_bl_unlock(head);
++      entry = mb_cache_entry_get(cache, key, value);
++      if (!entry)
++              return NULL;
++      /*
++       * Drop the ref we got from mb_cache_entry_get() and the initial hash
++       * ref if we are the last user
++       */
++      if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2)
++              return entry;
++
++      spin_lock(&cache->c_list_lock);
++      if (!list_empty(&entry->e_list))
++              list_del_init(&entry->e_list);
++      cache->c_entry_count--;
++      spin_unlock(&cache->c_list_lock);
++      __mb_cache_entry_free(cache, entry);
+       return NULL;
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
+@@ -343,42 +336,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+                                    unsigned long nr_to_scan)
+ {
+       struct mb_cache_entry *entry;
+-      struct hlist_bl_head *head;
+       unsigned long shrunk = 0;
+       spin_lock(&cache->c_list_lock);
+       while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+               entry = list_first_entry(&cache->c_list,
+                                        struct mb_cache_entry, e_list);
+-              if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
++              /* Drop initial hash reference if there is no user */
++              if (entry->e_referenced ||
++                  atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) {
+                       entry->e_referenced = 0;
+                       list_move_tail(&entry->e_list, &cache->c_list);
+                       continue;
+               }
+               list_del_init(&entry->e_list);
+               cache->c_entry_count--;
+-              /*
+-               * We keep LRU list reference so that entry doesn't go away
+-               * from under us.
+-               */
+               spin_unlock(&cache->c_list_lock);
+-              head = mb_cache_entry_head(cache, entry->e_key);
+-              hlist_bl_lock(head);
+-              /* Now a reliable check if the entry didn't get used... */
+-              if (atomic_read(&entry->e_refcnt) > 2) {
+-                      hlist_bl_unlock(head);
+-                      spin_lock(&cache->c_list_lock);
+-                      list_add_tail(&entry->e_list, &cache->c_list);
+-                      cache->c_entry_count++;
+-                      continue;
+-              }
+-              if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+-                      hlist_bl_del_init(&entry->e_hash_list);
+-                      atomic_dec(&entry->e_refcnt);
+-              }
+-              hlist_bl_unlock(head);
+-              if (mb_cache_entry_put(cache, entry))
+-                      shrunk++;
++              __mb_cache_entry_free(cache, entry);
++              shrunk++;
+               cond_resched();
+               spin_lock(&cache->c_list_lock);
+       }
+@@ -470,11 +445,6 @@ void mb_cache_destroy(struct mb_cache *cache)
+        * point.
+        */
+       list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+-              if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+-                      hlist_bl_del_init(&entry->e_hash_list);
+-                      atomic_dec(&entry->e_refcnt);
+-              } else
+-                      WARN_ON(1);
+               list_del(&entry->e_list);
+               WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+               mb_cache_entry_put(cache, entry);
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 8eca7f25c432..e9d5ece87794 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -13,8 +13,16 @@ struct mb_cache;
+ struct mb_cache_entry {
+       /* List of entries in cache - protected by cache->c_list_lock */
+       struct list_head        e_list;
+-      /* Hash table list - protected by hash chain bitlock */
++      /*
++       * Hash table list - protected by hash chain bitlock. The entry is
++       * guaranteed to be hashed while e_refcnt > 0.
++       */
+       struct hlist_bl_node    e_hash_list;
++      /*
++       * Entry refcount. Once it reaches zero, entry is unhashed and freed.
++       * While refcount > 0, the entry is guaranteed to stay in the hash and
++       * e.g. mb_cache_entry_try_delete() will fail.
++       */
+       atomic_t                e_refcnt;
+       /* Key in hash - stable during lifetime of the entry */
+       u32                     e_key;
+@@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache);
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+                         u64 value, bool reusable);
+-void __mb_cache_entry_free(struct mb_cache_entry *entry);
++void __mb_cache_entry_free(struct mb_cache *cache,
++                         struct mb_cache_entry *entry);
+ void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+-static inline int mb_cache_entry_put(struct mb_cache *cache,
+-                                   struct mb_cache_entry *entry)
++static inline void mb_cache_entry_put(struct mb_cache *cache,
++                                    struct mb_cache_entry *entry)
+ {
+       unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
+       if (cnt > 0) {
+-              if (cnt <= 3)
++              if (cnt <= 2)
+                       wake_up_var(&entry->e_refcnt);
+-              return 0;
++              return;
+       }
+-      __mb_cache_entry_free(entry);
+-      return 1;
++      __mb_cache_entry_free(cache, entry);
+ }
+ struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
+-- 
+2.35.1
+
diff --git a/queue-5.10/mbcache-don-t-reclaim-used-entries.patch b/queue-5.10/mbcache-don-t-reclaim-used-entries.patch
new file mode 100644 (file)
index 0000000..f222fe5
--- /dev/null
@@ -0,0 +1,56 @@
+From f0057e035b042df0fbd7e425bad663ecc584e266 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:20 +0200
+Subject: mbcache: don't reclaim used entries
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 58318914186c157477b978b1739dfe2f1b9dc0fe ]
+
+Do not reclaim entries that are currently used by somebody from a
+shrinker. Firstly, these entries are likely useful. Secondly, we will
+need to keep such entries to protect pending increment of xattr block
+refcount.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 97c54d3a2227..cfc28129fb6f 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -288,7 +288,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+       while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+               entry = list_first_entry(&cache->c_list,
+                                        struct mb_cache_entry, e_list);
+-              if (entry->e_referenced) {
++              if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
+                       entry->e_referenced = 0;
+                       list_move_tail(&entry->e_list, &cache->c_list);
+                       continue;
+@@ -302,6 +302,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+               spin_unlock(&cache->c_list_lock);
+               head = mb_cache_entry_head(cache, entry->e_key);
+               hlist_bl_lock(head);
++              /* Now a reliable check if the entry didn't get used... */
++              if (atomic_read(&entry->e_refcnt) > 2) {
++                      hlist_bl_unlock(head);
++                      spin_lock(&cache->c_list_lock);
++                      list_add_tail(&entry->e_list, &cache->c_list);
++                      cache->c_entry_count++;
++                      continue;
++              }
+               if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+                       hlist_bl_del_init(&entry->e_hash_list);
+                       atomic_dec(&entry->e_refcnt);
+-- 
+2.35.1
+
diff --git a/queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch b/queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch
new file mode 100644 (file)
index 0000000..e48e0c9
--- /dev/null
@@ -0,0 +1,123 @@
+From d098fa4a5cb3f52e1b1f1c0ab93a98fcafb714de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 Feb 2021 22:22:14 -0800
+Subject: mm/highmem: Lift memcpy_[to|from]_page to core
+
+From: Ira Weiny <ira.weiny@intel.com>
+
+[ Upstream commit bb90d4bc7b6a536b2e4db45f4763e467c2008251 ]
+
+Working through a conversion to a call kmap_local_page() instead of
+kmap() revealed many places where the pattern kmap/memcpy/kunmap
+occurred.
+
+Eric Biggers, Matthew Wilcox, Christoph Hellwig, Dan Williams, and Al
+Viro all suggested putting this code into helper functions.  Al Viro
+further pointed out that these functions already existed in the iov_iter
+code.[1]
+
+Various locations for the lifted functions were considered.
+
+Headers like mm.h or string.h seem ok but don't really portray the
+functionality well.  pagemap.h made some sense but is for page cache
+functionality.[2]
+
+Another alternative would be to create a new header for the promoted
+memcpy functions, but it masks the fact that these are designed to copy
+to/from pages using the kernel direct mappings and complicates matters
+with a new header.
+
+Placing these functions in 'highmem.h' is suboptimal especially with the
+changes being proposed in the functionality of kmap.  From a caller
+perspective including/using 'highmem.h' implies that the functions
+defined in that header are only required when highmem is in use which is
+increasingly not the case with modern processors.  However, highmem.h is
+where all the current functions like this reside (zero_user(),
+clear_highpage(), clear_user_highpage(), copy_user_highpage(), and
+copy_highpage()).  So it makes the most sense even though it is
+distasteful for some.[3]
+
+Lift memcpy_to_page() and memcpy_from_page() to pagemap.h.
+
+[1] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/
+    https://lore.kernel.org/lkml/20201013112544.GA5249@infradead.org/
+
+[2] https://lore.kernel.org/lkml/20201208122316.GH7338@casper.infradead.org/
+
+[3] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/#t
+    https://lore.kernel.org/lkml/20201208163814.GN1563847@iweiny-DESK2.sc.intel.com/
+
+Cc: Boris Pismenny <borisp@mellanox.com>
+Cc: Or Gerlitz <gerlitz.or@gmail.com>
+Cc: Dave Hansen <dave.hansen@intel.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Suggested-by: Christoph Hellwig <hch@infradead.org>
+Suggested-by: Dan Williams <dan.j.williams@intel.com>
+Suggested-by: Al Viro <viro@zeniv.linux.org.uk>
+Suggested-by: Eric Biggers <ebiggers@kernel.org>
+Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/highmem.h | 18 ++++++++++++++++++
+ lib/iov_iter.c          | 14 --------------
+ 2 files changed, 18 insertions(+), 14 deletions(-)
+
+diff --git a/include/linux/highmem.h b/include/linux/highmem.h
+index 14e6202ce47f..b25df1f8d48d 100644
+--- a/include/linux/highmem.h
++++ b/include/linux/highmem.h
+@@ -345,4 +345,22 @@ static inline void copy_highpage(struct page *to, struct page *from)
+ #endif
++static inline void memcpy_from_page(char *to, struct page *page,
++                                  size_t offset, size_t len)
++{
++      char *from = kmap_atomic(page);
++
++      memcpy(to, from + offset, len);
++      kunmap_atomic(from);
++}
++
++static inline void memcpy_to_page(struct page *page, size_t offset,
++                                const char *from, size_t len)
++{
++      char *to = kmap_atomic(page);
++
++      memcpy(to + offset, from, len);
++      kunmap_atomic(to);
++}
++
+ #endif /* _LINUX_HIGHMEM_H */
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 650554964f18..6e30113303ba 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -467,20 +467,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
+ }
+ EXPORT_SYMBOL(iov_iter_init);
+-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
+-{
+-      char *from = kmap_atomic(page);
+-      memcpy(to, from + offset, len);
+-      kunmap_atomic(from);
+-}
+-
+-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
+-{
+-      char *to = kmap_atomic(page);
+-      memcpy(to + offset, from, len);
+-      kunmap_atomic(to);
+-}
+-
+ static void memzero_page(struct page *page, size_t offset, size_t len)
+ {
+       char *addr = kmap_atomic(page);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch b/queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch
new file mode 100644 (file)
index 0000000..9a4ce40
--- /dev/null
@@ -0,0 +1,71 @@
+From a3f1e166ebaff9e6042cd8bfcf30de070292c5b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 16:14:47 +0800
+Subject: net: amd-xgbe: add missed tasklet_kill
+
+From: Jiguang Xiao <jiguang.xiao@windriver.com>
+
+[ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ]
+
+The driver does not call tasklet_kill in several places.
+Add the calls to fix it.
+
+Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared")
+Signed-off-by: Jiguang Xiao <jiguang.xiao@windriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/amd/xgbe/xgbe-drv.c  | 3 +++
+ drivers/net/ethernet/amd/xgbe/xgbe-i2c.c  | 4 +++-
+ drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++-
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+index a816b30bca04..a5d6faf7b89e 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+@@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
+       devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
++      tasklet_kill(&pdata->tasklet_dev);
++      tasklet_kill(&pdata->tasklet_ecc);
++
+       if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+               devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+index 22d4fc547a0a..a9ccc4258ee5 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
+@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+       xgbe_i2c_disable(pdata);
+       xgbe_i2c_clear_all_interrupts(pdata);
+-      if (pdata->dev_irq != pdata->i2c_irq)
++      if (pdata->dev_irq != pdata->i2c_irq) {
+               devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
++              tasklet_kill(&pdata->tasklet_i2c);
++      }
+ }
+ static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+index 4e97b4869522..0c5c1b155683 100644
+--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+@@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+       /* Disable auto-negotiation */
+       xgbe_an_disable_all(pdata);
+-      if (pdata->dev_irq != pdata->an_irq)
++      if (pdata->dev_irq != pdata->an_irq) {
+               devm_free_irq(pdata->dev, pdata->an_irq, pdata);
++              tasklet_kill(&pdata->tasklet_an);
++      }
+       pdata->phy_if.phy_impl.stop(pdata);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch b/queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch
new file mode 100644 (file)
index 0000000..54466bd
--- /dev/null
@@ -0,0 +1,43 @@
+From 33656c78a47654a45874e2b6c7ab0e2eaba9e8aa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:43:41 +0800
+Subject: net: hns3: add interrupts re-initialization while doing VF FLR
+
+From: Jie Wang <wangjie125@huawei.com>
+
+[ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ]
+
+Currently keep alive message between PF and VF may be lost and the VF is
+unalive in PF. So the VF will not do reset during PF FLR reset process.
+This would make the allocated interrupt resources of VF invalid and VF
+would't receive or respond to PF any more.
+
+So this patch adds VF interrupts re-initialization during VF FLR for VF
+recovery in above cases.
+
+Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR")
+Signed-off-by: Jie Wang <wangjie125@huawei.com>
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+index d6580e942724..f7f3e4bbc477 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+@@ -3089,7 +3089,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
+       struct pci_dev *pdev = hdev->pdev;
+       int ret = 0;
+-      if (hdev->reset_type == HNAE3_VF_FULL_RESET &&
++      if ((hdev->reset_type == HNAE3_VF_FULL_RESET ||
++           hdev->reset_type == HNAE3_FLR_RESET) &&
+           test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) {
+               hclgevf_misc_irq_uninit(hdev);
+               hclgevf_uninit_msi(hdev);
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch b/queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch
new file mode 100644 (file)
index 0000000..b446620
--- /dev/null
@@ -0,0 +1,39 @@
+From 4259afd7d9308433854c19ba2e7bd444d9d8758d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 12:51:52 +0200
+Subject: net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error
+ path
+
+From: Jiri Pirko <jiri@nvidia.com>
+
+[ Upstream commit 2a35b2c2e6a252eda2134aae6a756861d9299531 ]
+
+There are two cleanup calls missing in mlx5_init_once() error path.
+Add them making the error path flow to be the same as
+mlx5_cleanup_once().
+
+Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support")
+Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section")
+Signed-off-by: Jiri Pirko <jiri@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index 8246b6285d5a..29bc1df28aeb 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -906,6 +906,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
+ err_tables_cleanup:
+       mlx5_geneve_destroy(dev->geneve);
+       mlx5_vxlan_destroy(dev->vxlan);
++      mlx5_cleanup_clock(dev);
++      mlx5_cleanup_reserved_gids(dev);
+       mlx5_cq_debugfs_cleanup(dev);
+       mlx5_fw_reset_cleanup(dev);
+ err_events_cleanup:
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch b/queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch
new file mode 100644 (file)
index 0000000..e8b04e5
--- /dev/null
@@ -0,0 +1,49 @@
+From 87650d1c366cdc9c687b5fa60b044fa660402cd5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Nov 2022 13:34:12 +0200
+Subject: net/mlx5: Avoid recovery in probe flows
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 9078e843efec530f279a155f262793c58b0746bd ]
+
+Currently, recovery is done without considering whether the device is
+still in probe flow.
+This may lead to recovery before device have finished probed
+successfully. e.g.: while mlx5_init_one() is running. Recovery flow is
+using functionality that is loaded only by mlx5_init_one(), and there
+is no point in running recovery without mlx5_init_one() finished
+successfully.
+
+Fix it by waiting for probe flow to finish and checking whether the
+device is probed before trying to perform recovery.
+
+Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Moshe Shemesh <moshe@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+index 0c32c485eb58..b21054514736 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -618,6 +618,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work)
+       priv = container_of(health, struct mlx5_priv, health);
+       dev = container_of(priv, struct mlx5_core_dev, priv);
++      mutex_lock(&dev->intf_state_mutex);
++      if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) {
++              mlx5_core_err(dev, "health works are not permitted at this stage\n");
++              return;
++      }
++      mutex_unlock(&dev->intf_state_mutex);
+       enter_error_state(dev, false);
+       if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) {
+               if (mlx5_health_try_recover(dev))
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch b/queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch
new file mode 100644 (file)
index 0000000..b5f2f31
--- /dev/null
@@ -0,0 +1,48 @@
+From cc96b29be9be075c5be4df53492e3f617c586b0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 16:02:57 +0200
+Subject: net/mlx5e: Fix hw mtu initializing at XDP SQ allocation
+
+From: Adham Faris <afaris@nvidia.com>
+
+[ Upstream commit 1e267ab88dc44c48f556218f7b7f14c76f7aa066 ]
+
+Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or
+mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to
+hw mtu (configured at xdp sq allocation) before xmiting it. This check
+does not account for ethernet fcs length (calculated and filled by the
+nic). Hence, when we try sending packets with length > (hw-mtu -
+ethernet-fcs-size), the device port drops it and tx_errors_phy is
+incremented. Desired behavior is to catch these packets and drop them
+by the driver.
+
+Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by
+subtracting ethernet FCS header size (4 Bytes) from current hw mtu
+value, since ethernet FCS is calculated and written to ethernet frames
+by the nic.
+
+Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()")
+Signed-off-by: Adham Faris <afaris@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index cfc3bfcb04a2..5673a4113253 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -992,7 +992,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c,
+       sq->channel   = c;
+       sq->uar_map   = mdev->mlx5e_res.bfreg.map;
+       sq->min_inline_mode = params->tx_min_inline_mode;
+-      sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
++      sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN;
+       sq->xsk_pool  = xsk_pool;
+       sq->stats = sq->xsk_pool ?
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch b/queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch
new file mode 100644 (file)
index 0000000..8e94b1d
--- /dev/null
@@ -0,0 +1,45 @@
+From 1d33b28a01796b6437dcccdcc70f4cc7e3f3b8a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Nov 2022 15:24:21 +0200
+Subject: net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by
+ default
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit b12d581e83e3ae1080c32ab83f123005bd89a840 ]
+
+mlx5e_build_nic_params will turn CQE compression on if the hardware
+capability is enabled and the slow_pci_heuristic condition is detected.
+As IPoIB doesn't support CQE compression, make sure to disable the
+feature in the IPoIB profile init.
+
+Please note that the feature is not exposed to the user for IPoIB
+interfaces, so it can't be subsequently turned on.
+
+Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+index 5c6a376aa62e..0e7fd200b426 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+@@ -69,6 +69,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev,
+       params->lro_en = false;
+       params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN;
+       params->tunneled_offload_en = false;
++
++      /* CQE compression is not supported for IPoIB */
++      params->rx_cqe_compress_def = false;
++      MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def);
+ }
+ /* Called directly after IPoIB netdevice was created to initialize SW structs */
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch b/queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch
new file mode 100644 (file)
index 0000000..2008787
--- /dev/null
@@ -0,0 +1,35 @@
+From 1ba6e7c0947e2d8d46aefaac005f49342d83faf9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 10:29:25 +0400
+Subject: net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ]
+
+of_phy_find_device() return device node with refcount incremented.
+Call put_device() to relese it when not needed anymore.
+
+Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/xilinx_gmii2rgmii.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c
+index 151c2a3f0b3a..7a78dfdfa5bd 100644
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -82,6 +82,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev)
+       if (!priv->phy_dev->drv) {
+               dev_info(dev, "Attached phy not ready\n");
++              put_device(&priv->phy_dev->mdio.dev);
+               return -EPROBE_DEFER;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch b/queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch
new file mode 100644 (file)
index 0000000..ccbaa77
--- /dev/null
@@ -0,0 +1,42 @@
+From f32f7d5cdc2515b7a4d53857e223ad17c1c59c2f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:43 -0500
+Subject: net: sched: atm: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume
+res.class contains a valid pointer
+Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent")
+
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_atm.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
+index 794c7377cd7e..95967ce1f370 100644
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -396,10 +396,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
+                               result = tcf_classify(skb, fl, &res, true);
+                               if (result < 0)
+                                       continue;
++                              if (result == TC_ACT_SHOT)
++                                      goto done;
++
+                               flow = (struct atm_flow_data *)res.class;
+                               if (!flow)
+                                       flow = lookup_flow(sch, res.classid);
+-                              goto done;
++                              goto drop;
+                       }
+               }
+               flow = NULL;
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch b/queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch
new file mode 100644 (file)
index 0000000..e12db45
--- /dev/null
@@ -0,0 +1,147 @@
+From a4e6f6cce1acb5309ca50e4ca601d69f1514f525 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 1 Jan 2023 16:57:44 -0500
+Subject: net: sched: cbq: dont intepret cls results when asked to drop
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ]
+
+If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that
+res.class contains a valid pointer
+
+Sample splat reported by Kyle Zeng
+
+[    5.405624] 0: reclassify loop, rule prio 0, protocol 800
+[    5.406326] ==================================================================
+[    5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0
+[    5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299
+[    5.408731]
+[    5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15
+[    5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
+BIOS 1.15.0-1 04/01/2014
+[    5.410439] Call Trace:
+[    5.410764]  dump_stack+0x87/0xcd
+[    5.411153]  print_address_description+0x7a/0x6b0
+[    5.411687]  ? vprintk_func+0xb9/0xc0
+[    5.411905]  ? printk+0x76/0x96
+[    5.412110]  ? cbq_enqueue+0x54b/0xea0
+[    5.412323]  kasan_report+0x17d/0x220
+[    5.412591]  ? cbq_enqueue+0x54b/0xea0
+[    5.412803]  __asan_report_load1_noabort+0x10/0x20
+[    5.413119]  cbq_enqueue+0x54b/0xea0
+[    5.413400]  ? __kasan_check_write+0x10/0x20
+[    5.413679]  __dev_queue_xmit+0x9c0/0x1db0
+[    5.413922]  dev_queue_xmit+0xc/0x10
+[    5.414136]  ip_finish_output2+0x8bc/0xcd0
+[    5.414436]  __ip_finish_output+0x472/0x7a0
+[    5.414692]  ip_finish_output+0x5c/0x190
+[    5.414940]  ip_output+0x2d8/0x3c0
+[    5.415150]  ? ip_mc_finish_output+0x320/0x320
+[    5.415429]  __ip_queue_xmit+0x753/0x1760
+[    5.415664]  ip_queue_xmit+0x47/0x60
+[    5.415874]  __tcp_transmit_skb+0x1ef9/0x34c0
+[    5.416129]  tcp_connect+0x1f5e/0x4cb0
+[    5.416347]  tcp_v4_connect+0xc8d/0x18c0
+[    5.416577]  __inet_stream_connect+0x1ae/0xb40
+[    5.416836]  ? local_bh_enable+0x11/0x20
+[    5.417066]  ? lock_sock_nested+0x175/0x1d0
+[    5.417309]  inet_stream_connect+0x5d/0x90
+[    5.417548]  ? __inet_stream_connect+0xb40/0xb40
+[    5.417817]  __sys_connect+0x260/0x2b0
+[    5.418037]  __x64_sys_connect+0x76/0x80
+[    5.418267]  do_syscall_64+0x31/0x50
+[    5.418477]  entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[    5.418770] RIP: 0033:0x473bb7
+[    5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00
+00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00
+00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34
+24 89
+[    5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX:
+000000000000002a
+[    5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7
+[    5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007
+[    5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004
+[    5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001
+[    5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002
+[    5.422471]
+[    5.422562] Allocated by task 299:
+[    5.422782]  __kasan_kmalloc+0x12d/0x160
+[    5.423007]  kasan_kmalloc+0x5/0x10
+[    5.423208]  kmem_cache_alloc_trace+0x201/0x2e0
+[    5.423492]  tcf_proto_create+0x65/0x290
+[    5.423721]  tc_new_tfilter+0x137e/0x1830
+[    5.423957]  rtnetlink_rcv_msg+0x730/0x9f0
+[    5.424197]  netlink_rcv_skb+0x166/0x300
+[    5.424428]  rtnetlink_rcv+0x11/0x20
+[    5.424639]  netlink_unicast+0x673/0x860
+[    5.424870]  netlink_sendmsg+0x6af/0x9f0
+[    5.425100]  __sys_sendto+0x58d/0x5a0
+[    5.425315]  __x64_sys_sendto+0xda/0xf0
+[    5.425539]  do_syscall_64+0x31/0x50
+[    5.425764]  entry_SYSCALL_64_after_hwframe+0x61/0xc6
+[    5.426065]
+[    5.426157] The buggy address belongs to the object at ffff88800e312200
+[    5.426157]  which belongs to the cache kmalloc-128 of size 128
+[    5.426955] The buggy address is located 42 bytes to the right of
+[    5.426955]  128-byte region [ffff88800e312200, ffff88800e312280)
+[    5.427688] The buggy address belongs to the page:
+[    5.427992] page:000000009875fabc refcount:1 mapcount:0
+mapping:0000000000000000 index:0x0 pfn:0xe312
+[    5.428562] flags: 0x100000000000200(slab)
+[    5.428812] raw: 0100000000000200 dead000000000100 dead000000000122
+ffff888007843680
+[    5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff
+ffff88800e312401
+[    5.429875] page dumped because: kasan: bad access detected
+[    5.430214] page->mem_cgroup:ffff88800e312401
+[    5.430471]
+[    5.430564] Memory state around the buggy address:
+[    5.430846]  ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.431267]  ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[    5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.432123]                                   ^
+[    5.432391]  ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00
+00 00 00 fc
+[    5.432810]  ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc
+fc fc fc fc
+[    5.433229] ==================================================================
+[    5.433648] Disabling lock debugging due to kernel taint
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Kyle Zeng <zengyhkyle@gmail.com>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_cbq.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
+index 9a3dff02b7a2..3da5eb313c24 100644
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+               result = tcf_classify(skb, fl, &res, true);
+               if (!fl || result < 0)
+                       goto fallback;
++              if (result == TC_ACT_SHOT)
++                      return NULL;
+               cl = (void *)res.class;
+               if (!cl) {
+@@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+               case TC_ACT_TRAP:
+                       *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+                       fallthrough;
+-              case TC_ACT_SHOT:
+-                      return NULL;
+               case TC_ACT_RECLASSIFY:
+                       return cbq_reclassify(skb, cl);
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch b/queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch
new file mode 100644 (file)
index 0000000..0a6108d
--- /dev/null
@@ -0,0 +1,150 @@
+From 0e40e1546d4f862f22270418d33681756dec9507 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 11:51:19 +0800
+Subject: net: sched: fix memory leak in tcindex_set_parms
+
+From: Hawkins Jiawei <yin31149@gmail.com>
+
+[ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ]
+
+Syzkaller reports a memory leak as follows:
+====================================
+BUG: memory leak
+unreferenced object 0xffff88810c287f00 (size 256):
+  comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s)
+  hex dump (first 32 bytes):
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
+  backtrace:
+    [<ffffffff814cf9f0>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046
+    [<ffffffff839c9e07>] kmalloc include/linux/slab.h:576 [inline]
+    [<ffffffff839c9e07>] kmalloc_array include/linux/slab.h:627 [inline]
+    [<ffffffff839c9e07>] kcalloc include/linux/slab.h:659 [inline]
+    [<ffffffff839c9e07>] tcf_exts_init include/net/pkt_cls.h:250 [inline]
+    [<ffffffff839c9e07>] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342
+    [<ffffffff839caa1f>] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553
+    [<ffffffff8394db62>] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147
+    [<ffffffff8389e91c>] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082
+    [<ffffffff839eba67>] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540
+    [<ffffffff839eab87>] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline]
+    [<ffffffff839eab87>] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345
+    [<ffffffff839eb046>] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921
+    [<ffffffff8383e796>] sock_sendmsg_nosec net/socket.c:714 [inline]
+    [<ffffffff8383e796>] sock_sendmsg+0x56/0x80 net/socket.c:734
+    [<ffffffff8383eb08>] ____sys_sendmsg+0x178/0x410 net/socket.c:2482
+    [<ffffffff83843678>] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536
+    [<ffffffff838439c5>] __sys_sendmmsg+0x105/0x330 net/socket.c:2622
+    [<ffffffff83843c14>] __do_sys_sendmmsg net/socket.c:2651 [inline]
+    [<ffffffff83843c14>] __se_sys_sendmmsg net/socket.c:2648 [inline]
+    [<ffffffff83843c14>] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648
+    [<ffffffff84605fd5>] do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+    [<ffffffff84605fd5>] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+    [<ffffffff84800087>] entry_SYSCALL_64_after_hwframe+0x63/0xcd
+====================================
+
+Kernel uses tcindex_change() to change an existing
+filter properties.
+
+Yet the problem is that, during the process of changing,
+if `old_r` is retrieved from `p->perfect`, then
+kernel uses tcindex_alloc_perfect_hash() to newly
+allocate filter results, uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure, which triggers the above memory leak.
+
+To be more specific, there are only two source for the `old_r`,
+according to the tcindex_lookup(). `old_r` is retrieved from
+`p->perfect`, or `old_r` is retrieved from `p->h`.
+
+  * If `old_r` is retrieved from `p->perfect`, kernel uses
+tcindex_alloc_perfect_hash() to newly allocate the
+filter results. Then `r` is assigned with `cp->perfect + handle`,
+which is newly allocated. So condition `old_r && old_r != r` is
+true in this situation, and kernel uses tcindex_filter_result_init()
+to clear the old filter result, without destroying
+its tcf_exts structure
+
+  * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL
+according to the tcindex_lookup(). Considering that `cp->h`
+is directly copied from `p->h` and `p->perfect` is NULL,
+`r` is assigned with `tcindex_lookup(cp, handle)`, whose value
+should be the same as `old_r`, so condition `old_r && old_r != r`
+is false in this situation, kernel ignores using
+tcindex_filter_result_init() to clear the old filter result.
+
+So only when `old_r` is retrieved from `p->perfect` does kernel use
+tcindex_filter_result_init() to clear the old filter result, which
+triggers the above memory leak.
+
+Considering that there already exists a tc_filter_wq workqueue
+to destroy the old tcindex_data by tcindex_partial_destroy_work()
+at the end of tcindex_set_parms(), this patch solves
+this memory leak bug by removing this old filter result
+clearing part and delegating it to the tc_filter_wq workqueue.
+
+Note that this patch doesn't introduce any other issues. If
+`old_r` is retrieved from `p->perfect`, this patch just
+delegates old filter result clearing part to the
+tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`,
+kernel doesn't reach the old filter result clearing part, so
+removing this part has no effect.
+
+[Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni
+and Dmitry Vyukov]
+
+Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()")
+Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/
+Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com
+Cc: Cong Wang <cong.wang@bytedance.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_tcindex.c | 12 ++----------
+ 1 file changed, 2 insertions(+), 10 deletions(-)
+
+diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
+index e9a8a2c86bbd..86250221d08d 100644
+--- a/net/sched/cls_tcindex.c
++++ b/net/sched/cls_tcindex.c
+@@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+                 struct tcindex_filter_result *r, struct nlattr **tb,
+                 struct nlattr *est, bool ovr, struct netlink_ext_ack *extack)
+ {
+-      struct tcindex_filter_result new_filter_result, *old_r = r;
++      struct tcindex_filter_result new_filter_result;
+       struct tcindex_data *cp = NULL, *oldp;
+       struct tcindex_filter *f = NULL; /* make gcc behave */
+       struct tcf_result cr = {};
+@@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+       err = tcindex_filter_result_init(&new_filter_result, cp, net);
+       if (err < 0)
+               goto errout_alloc;
+-      if (old_r)
++      if (r)
+               cr = r->res;
+       err = -EBUSY;
+@@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
+               tcf_bind_filter(tp, &cr, base);
+       }
+-      if (old_r && old_r != r) {
+-              err = tcindex_filter_result_init(old_r, cp, net);
+-              if (err < 0) {
+-                      kfree(f);
+-                      goto errout_alloc;
+-              }
+-      }
+-
+       oldp = p;
+       r->res = cr;
+       tcf_exts_change(&r->exts, &e);
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch b/queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch
new file mode 100644 (file)
index 0000000..4184a1b
--- /dev/null
@@ -0,0 +1,109 @@
+From 3d7418d64c90ca3d25c6d73f80137f6518e4f490 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 13:24:37 +0100
+Subject: netfilter: ipset: fix hash:net,port,net hang with /0 subnet
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 ]
+
+The hash:net,port,net set type supports /0 subnets. However, the patch
+commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range
+of consecutive elements to add/delete" did not take into account it and
+resulted in an endless loop. The bug is actually older but the patch
+5f7b51bf09baca8e brings it out earlier.
+
+Handle /0 subnets properly in hash:net,port,net set types.
+
+Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
+Reported-by: ÐœÐ°Ñ€Ðº ÐšÐ¾Ñ€ÐµÐ½Ð±ÐµÑ€Ð³ <socketpair@gmail.com>
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++----------
+ 1 file changed, 21 insertions(+), 19 deletions(-)
+
+diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
+index 6446f4fccc72..144346faffc1 100644
+--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
+@@ -172,17 +172,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
+       return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
+ }
++static u32
++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
++{
++      if (from == 0 && to == UINT_MAX) {
++              *cidr = 0;
++              return to;
++      }
++      return ip_set_range_to_cidr(from, to, cidr);
++}
++
+ static int
+ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netportnet4 *h = set->data;
++      struct hash_netportnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netportnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
+-      u64 n = 0, m = 0;
++      u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -284,19 +293,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+-              n++;
+-      } while (ipn++ < ip_to);
+-      ipn = ip2_from;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+-              m++;
+-      } while (ipn++ < ip2_to);
+-
+-      if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip[0]);
+@@ -309,13 +305,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       do {
+               e.ip[0] = htonl(ip);
+-              ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
++              ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+                       do {
++                              i++;
+                               e.ip[1] = htonl(ip2);
+-                              ip2 = ip_set_range_to_cidr(ip2, ip2_to,
+-                                                         &e.cidr[1]);
++                              if (i > IPSET_MAX_RANGE) {
++                                      hash_netportnet4_data_next(&h->next,
++                                                                 &e);
++                                      return -ERANGE;
++                              }
++                              ip2 = hash_netportnet4_range_to_cidr(ip2,
++                                                      ip2_to, &e.cidr[1]);
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+                               if (ret && !ip_set_eexist(ret, flags))
+                                       return ret;
+-- 
+2.35.1
+
diff --git a/queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch b/queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch
new file mode 100644 (file)
index 0000000..3f48901
--- /dev/null
@@ -0,0 +1,462 @@
+From 8cdd02e4865a438fda6662920436bad63b35d1d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 13:24:38 +0100
+Subject: netfilter: ipset: Rework long task execution when adding/deleting
+ entries
+
+From: Jozsef Kadlecsik <kadlec@netfilter.org>
+
+[ Upstream commit 5e29dc36bd5e2166b834ceb19990d9e68a734d7d ]
+
+When adding/deleting large number of elements in one step in ipset, it can
+take a reasonable amount of time and can result in soft lockup errors. The
+patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of
+consecutive elements to add/delete") tried to fix it by limiting the max
+elements to process at all. However it was not enough, it is still possible
+that we get hung tasks. Lowering the limit is not reasonable, so the
+approach in this patch is as follows: rely on the method used at resizing
+sets and save the state when we reach a smaller internal batch limit,
+unlock/lock and proceed from the saved state. Thus we can avoid long
+continuous tasks and at the same time removed the limit to add/delete large
+number of elements in one step.
+
+The nfnl mutex is held during the whole operation which prevents one to
+issue other ipset commands in parallel.
+
+Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
+Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com
+Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netfilter/ipset/ip_set.h      |  2 +-
+ net/netfilter/ipset/ip_set_core.c           |  7 ++++---
+ net/netfilter/ipset/ip_set_hash_ip.c        | 14 ++++++-------
+ net/netfilter/ipset/ip_set_hash_ipmark.c    | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipport.c    | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipportip.c  | 13 ++++++------
+ net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++-----
+ net/netfilter/ipset/ip_set_hash_net.c       | 17 +++++++--------
+ net/netfilter/ipset/ip_set_hash_netiface.c  | 15 ++++++--------
+ net/netfilter/ipset/ip_set_hash_netnet.c    | 23 +++++++--------------
+ net/netfilter/ipset/ip_set_hash_netport.c   | 19 +++++++----------
+ 11 files changed, 68 insertions(+), 81 deletions(-)
+
+diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
+index 53c9a17ecb3e..62f7e7e257c1 100644
+--- a/include/linux/netfilter/ipset/ip_set.h
++++ b/include/linux/netfilter/ipset/ip_set.h
+@@ -199,7 +199,7 @@ struct ip_set_region {
+ };
+ /* Max range where every element is added/deleted in one step */
+-#define IPSET_MAX_RANGE               (1<<20)
++#define IPSET_MAX_RANGE               (1<<14)
+ /* The core set type structure */
+ struct ip_set_type {
+diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
+index c17a7dda0163..1bf6ab83644b 100644
+--- a/net/netfilter/ipset/ip_set_core.c
++++ b/net/netfilter/ipset/ip_set_core.c
+@@ -1708,9 +1708,10 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
+               ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
+               ip_set_unlock(set);
+               retried = true;
+-      } while (ret == -EAGAIN &&
+-               set->variant->resize &&
+-               (ret = set->variant->resize(set, retried)) == 0);
++      } while (ret == -ERANGE ||
++               (ret == -EAGAIN &&
++                set->variant->resize &&
++                (ret = set->variant->resize(set, retried)) == 0));
+       if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
+               return 0;
+diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
+index d7a81b2250e7..8720dc3bb689 100644
+--- a/net/netfilter/ipset/ip_set_hash_ip.c
++++ b/net/netfilter/ipset/ip_set_hash_ip.c
+@@ -97,11 +97,11 @@ static int
+ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+             enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ip4 *h = set->data;
++      struct hash_ip4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ip4_elem e = { 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, hosts;
++      u32 ip = 0, ip_to = 0, hosts, i = 0;
+       int ret = 0;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -146,14 +146,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
+       hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
+-      /* 64bit division is not allowed on 32bit */
+-      if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      for (; ip <= ip_to;) {
++      for (; ip <= ip_to; i++) {
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_ip4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+                       return ret;
+diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
+index eefce34a34f0..cbb05cb188f2 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
+@@ -96,11 +96,11 @@ static int
+ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipmark4 *h = set->data;
++      struct hash_ipmark4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipmark4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0;
++      u32 ip, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -147,13 +147,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
+               ip_set_mask_from_to(ip, ip_to, cidr);
+       }
+-      if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+-      for (; ip <= ip_to; ip++) {
++      for (; ip <= ip_to; ip++, i++) {
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_ipmark4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
+index 4a54e9e8ae59..c560f7873eca 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipport.c
++++ b/net/netfilter/ipset/ip_set_hash_ipport.c
+@@ -104,11 +104,11 @@ static int
+ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipport4 *h = set->data;
++      struct hash_ipport4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipport4_elem e = { .ip = 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0, p = 0, port, port_to;
++      u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -172,17 +172,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+       for (; ip <= ip_to; ip++) {
+               p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+                                                      : port;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.ip = htonl(ip);
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_ipport4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
+index 09737de5ecc3..b7eb8d1e77d9 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
+@@ -107,11 +107,11 @@ static int
+ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+                   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipportip4 *h = set->data;
++      struct hash_ipportip4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipportip4_elem e = { .ip = 0 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip, ip_to = 0, p = 0, port, port_to;
++      u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
+       bool with_ports = false;
+       int ret;
+@@ -179,17 +179,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       if (retried)
+               ip = ntohl(h->next.ip);
+       for (; ip <= ip_to; ip++) {
+               p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
+                                                      : port;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.ip = htonl(ip);
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_ipportip4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+index 02685371a682..16c5641ced53 100644
+--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
+@@ -159,12 +159,12 @@ static int
+ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_ipportnet4 *h = set->data;
++      struct hash_ipportnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0, p = 0, port, port_to;
+-      u32 ip2_from = 0, ip2_to = 0, ip2;
++      u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -252,9 +252,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                       swap(port, port_to);
+       }
+-      if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+-
+       ip2_to = ip2_from;
+       if (tb[IPSET_ATTR_IP2_TO]) {
+               ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
+@@ -281,9 +278,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               for (; p <= port_to; p++) {
+                       e.port = htons(p);
+                       do {
++                              i++;
+                               e.ip2 = htonl(ip2);
+                               ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
+                               e.cidr = cidr - 1;
++                              if (i > IPSET_MAX_RANGE) {
++                                      hash_ipportnet4_data_next(&h->next,
++                                                                &e);
++                                      return -ERANGE;
++                              }
+                               ret = adtfn(set, &e, &ext, &ext, flags);
+                               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
+index 9d1beaacb973..5ab5873d1d16 100644
+--- a/net/netfilter/ipset/ip_set_hash_net.c
++++ b/net/netfilter/ipset/ip_set_hash_net.c
+@@ -135,11 +135,11 @@ static int
+ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+              enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_net4 *h = set->data;
++      struct hash_net4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_net4_elem e = { .cidr = HOST_MASK };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, ipn, n = 0;
++      u32 ip = 0, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -187,19 +187,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
+               if (ip + UINT_MAX == ip_to)
+                       return -IPSET_ERR_HASH_RANGE;
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried)
+               ip = ntohl(h->next.ip);
+       do {
++              i++;
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_net4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+               if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
+index c3ada9c63fa3..7ef240380a45 100644
+--- a/net/netfilter/ipset/ip_set_hash_netiface.c
++++ b/net/netfilter/ipset/ip_set_hash_netiface.c
+@@ -201,7 +201,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 ip = 0, ip_to = 0, ipn, n = 0;
++      u32 ip = 0, ip_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -255,19 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip, ip_to, e.cidr);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried)
+               ip = ntohl(h->next.ip);
+       do {
++              i++;
+               e.ip = htonl(ip);
++              if (i > IPSET_MAX_RANGE) {
++                      hash_netiface4_data_next(&h->next, &e);
++                      return -ERANGE;
++              }
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
+               ret = adtfn(set, &e, &ext, &ext, flags);
+diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
+index b1411bc91a40..15f4b0292f0d 100644
+--- a/net/netfilter/ipset/ip_set_hash_netnet.c
++++ b/net/netfilter/ipset/ip_set_hash_netnet.c
+@@ -162,13 +162,12 @@ static int
+ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+                 enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netnet4 *h = set->data;
++      struct hash_netnet4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netnet4_elem e = { };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+       u32 ip = 0, ip_to = 0;
+-      u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
+-      u64 n = 0, m = 0;
++      u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
+       int ret;
+       if (tb[IPSET_ATTR_LINENO])
+@@ -244,19 +243,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
+-              n++;
+-      } while (ipn++ < ip_to);
+-      ipn = ip2_from;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
+-              m++;
+-      } while (ipn++ < ip2_to);
+-
+-      if (n*m > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip[0]);
+@@ -269,7 +255,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
+               e.ip[0] = htonl(ip);
+               ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+               do {
++                      i++;
+                       e.ip[1] = htonl(ip2);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_netnet4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
+index d26d13528fe8..e73ba50afe96 100644
+--- a/net/netfilter/ipset/ip_set_hash_netport.c
++++ b/net/netfilter/ipset/ip_set_hash_netport.c
+@@ -153,12 +153,11 @@ static int
+ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+                  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+ {
+-      const struct hash_netport4 *h = set->data;
++      struct hash_netport4 *h = set->data;
+       ipset_adtfn adtfn = set->variant->adt[adt];
+       struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
+       struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
+-      u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
+-      u64 n = 0;
++      u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
+       bool with_ports = false;
+       u8 cidr;
+       int ret;
+@@ -235,14 +234,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+       } else {
+               ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+       }
+-      ipn = ip;
+-      do {
+-              ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
+-              n++;
+-      } while (ipn++ < ip_to);
+-
+-      if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
+-              return -ERANGE;
+       if (retried) {
+               ip = ntohl(h->next.ip);
+@@ -254,8 +245,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
+               e.ip = htonl(ip);
+               ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
+               e.cidr = cidr - 1;
+-              for (; p <= port_to; p++) {
++              for (; p <= port_to; p++, i++) {
+                       e.port = htons(p);
++                      if (i > IPSET_MAX_RANGE) {
++                              hash_netport4_data_next(&h->next, &e);
++                              return -ERANGE;
++                      }
+                       ret = adtfn(set, &e, &ext, &ext, flags);
+                       if (ret && !ip_set_eexist(ret, flags))
+                               return ret;
+-- 
+2.35.1
+
diff --git a/queue-5.10/nfc-fix-potential-resource-leaks.patch b/queue-5.10/nfc-fix-potential-resource-leaks.patch
new file mode 100644 (file)
index 0000000..372e6f4
--- /dev/null
@@ -0,0 +1,127 @@
+From f2207976833000245dd02bdc302937f0fb5af43d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 11:37:18 +0400
+Subject: nfc: Fix potential resource leaks
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ]
+
+nfc_get_device() take reference for the device, add missing
+nfc_put_device() to release it when not need anymore.
+Also fix the style warnning by use error EOPNOTSUPP instead of
+ENOTSUPP.
+
+Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation")
+Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data")
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++-------------
+ 1 file changed, 38 insertions(+), 14 deletions(-)
+
+diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
+index b8939ebaa6d3..610caea4feec 100644
+--- a/net/nfc/netlink.c
++++ b/net/nfc/netlink.c
+@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+       u32 dev_idx, se_idx;
+       u8 *apdu;
+       size_t apdu_len;
++      int rc;
+       if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+           !info->attrs[NFC_ATTR_SE_INDEX] ||
+@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
+       if (!dev)
+               return -ENODEV;
+-      if (!dev->ops || !dev->ops->se_io)
+-              return -ENOTSUPP;
++      if (!dev->ops || !dev->ops->se_io) {
++              rc = -EOPNOTSUPP;
++              goto put_dev;
++      }
+       apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
+-      if (apdu_len == 0)
+-              return -EINVAL;
++      if (apdu_len == 0) {
++              rc = -EINVAL;
++              goto put_dev;
++      }
+       apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
+-      if (!apdu)
+-              return -EINVAL;
++      if (!apdu) {
++              rc = -EINVAL;
++              goto put_dev;
++      }
+       ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
+-      if (!ctx)
+-              return -ENOMEM;
++      if (!ctx) {
++              rc = -ENOMEM;
++              goto put_dev;
++      }
+       ctx->dev_idx = dev_idx;
+       ctx->se_idx = se_idx;
+-      return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++      rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
++
++put_dev:
++      nfc_put_device(dev);
++      return rc;
+ }
+ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+       subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+       dev = nfc_get_device(dev_idx);
+-      if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
++      if (!dev)
+               return -ENODEV;
++      if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
++              err = -ENODEV;
++              goto put_dev;
++      }
++
+       if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
+               data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+               data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+-              if (data_len == 0)
+-                      return -EINVAL;
++              if (data_len == 0) {
++                      err = -EINVAL;
++                      goto put_dev;
++              }
+       } else {
+               data = NULL;
+               data_len = 0;
+@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+               dev->cur_cmd_info = info;
+               err = cmd->doit(dev, data, data_len);
+               dev->cur_cmd_info = NULL;
+-              return err;
++              goto put_dev;
+       }
+-      return -EOPNOTSUPP;
++      err = -EOPNOTSUPP;
++
++put_dev:
++      nfc_put_device(dev);
++      return err;
+ }
+ /* message building helper */
+-- 
+2.35.1
+
diff --git a/queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch b/queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch
new file mode 100644 (file)
index 0000000..98af5bc
--- /dev/null
@@ -0,0 +1,42 @@
+From 688e07516d4622a53321baaf96cc5241f987f149 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 09:51:30 -0500
+Subject: nfsd: shut down the NFSv4 state objects before the filecache
+
+From: Jeff Layton <jlayton@kernel.org>
+
+[ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ]
+
+Currently, we shut down the filecache before trying to clean up the
+stateids that depend on it. This leads to the kernel trying to free an
+nfsd_file twice, and a refcount overput on the nf_mark.
+
+Change the shutdown procedure to tear down all of the stateids prior
+to shutting down the filecache.
+
+Reported-and-tested-by: Wang Yugui <wangyugui@e16-tech.com>
+Signed-off-by: Jeff Layton <jlayton@kernel.org>
+Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace")
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/nfsd/nfssvc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
+index 9323e30a7eaf..c7fffe1453bd 100644
+--- a/fs/nfsd/nfssvc.c
++++ b/fs/nfsd/nfssvc.c
+@@ -426,8 +426,8 @@ static void nfsd_shutdown_net(struct net *net)
+ {
+       struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+-      nfsd_file_cache_shutdown_net(net);
+       nfs4_state_shutdown_net(net);
++      nfsd_file_cache_shutdown_net(net);
+       if (nn->lockd_up) {
+               lockd_down(net);
+               nn->lockd_up = false;
+-- 
+2.35.1
+
diff --git a/queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch b/queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch
new file mode 100644 (file)
index 0000000..bc27a81
--- /dev/null
@@ -0,0 +1,81 @@
+From 578e47c9239ca21d035014344b391312620c422d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 09:57:21 +0800
+Subject: nvme: fix multipath crash caused by flush request when blktrace is
+ enabled
+
+From: Yanjun Zhang <zhangyanjun@cestc.cn>
+
+[ Upstream commit 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 ]
+
+The flush request initialized by blk_kick_flush has NULL bio,
+and it may be dealt with nvme_end_req during io completion.
+When blktrace is enabled, nvme_trace_bio_complete with multipath
+activated trying to access NULL pointer bio from flush request
+results in the following crash:
+
+[ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a
+[ 2517.835213] #PF: supervisor read access in kernel mode
+[ 2517.838724] #PF: error_code(0x0000) - not-present page
+[ 2517.842222] PGD 7b2d51067 P4D 0
+[ 2517.845684] Oops: 0000 [#1] SMP NOPTI
+[ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S                5.15.67-0.cl9.x86_64 #1
+[ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022
+[ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp]
+[ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30
+[ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba
+[ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286
+[ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000
+[ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000
+[ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000
+[ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8
+[ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018
+[ 2517.894434] FS:  0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000
+[ 2517.898299] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0
+[ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 2517.913761] PKRU: 55555554
+[ 2517.917558] Call Trace:
+[ 2517.921294]  <TASK>
+[ 2517.924982]  nvme_complete_rq+0x1c3/0x1e0 [nvme_core]
+[ 2517.928715]  nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp]
+[ 2517.932442]  nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp]
+[ 2517.936137]  ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp]
+[ 2517.939830]  tcp_read_sock+0x9c/0x260
+[ 2517.943486]  nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp]
+[ 2517.947173]  nvme_tcp_io_work+0x64/0x90 [nvme_tcp]
+[ 2517.950834]  process_one_work+0x1e8/0x390
+[ 2517.954473]  worker_thread+0x53/0x3c0
+[ 2517.958069]  ? process_one_work+0x390/0x390
+[ 2517.961655]  kthread+0x10c/0x130
+[ 2517.965211]  ? set_kthread_struct+0x40/0x40
+[ 2517.968760]  ret_from_fork+0x1f/0x30
+[ 2517.972285]  </TASK>
+
+To avoid this situation, add a NULL check for req->bio before
+calling trace_block_bio_complete.
+
+Signed-off-by: Yanjun Zhang <zhangyanjun@cestc.cn>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/nvme.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
+index 86336496c65c..c3e4d9b6f9c0 100644
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -749,7 +749,7 @@ static inline void nvme_trace_bio_complete(struct request *req,
+ {
+       struct nvme_ns *ns = req->q->queuedata;
+-      if (req->cmd_flags & REQ_NVME_MPATH)
++      if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio)
+               trace_block_bio_complete(ns->head->disk->queue, req->bio);
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch b/queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch
new file mode 100644 (file)
index 0000000..18038d4
--- /dev/null
@@ -0,0 +1,92 @@
+From dc0916234e76bd77e113ff84af947a8fd888f824 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 5 Nov 2022 12:01:14 +0900
+Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as
+ unsinged data
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ]
+
+DWARF version 5 standard Sec 2.14 says that
+
+  Any debugging information entry representing the declaration of an object,
+  module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and
+  DW_AT_decl_column attributes, each of whose value is an unsigned integer
+  constant.
+
+So it should be an unsigned integer data. Also, even though the standard
+doesn't clearly say the DW_AT_call_file is signed or unsigned, the
+elfutils (eu-readelf) interprets it as unsigned integer data and it is
+natural to handle it as unsigned integer data as same as DW_AT_decl_file.
+This changes the DW_AT_call_file as unsigned integer data too.
+
+Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances")
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 21 ++++-----------------
+ 1 file changed, 4 insertions(+), 17 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index dc02685a1eec..f8a10d5148f6 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+       return 0;
+ }
+-/* Get attribute and translate it as a sdata */
+-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+-                            Dwarf_Sword *result)
+-{
+-      Dwarf_Attribute attr;
+-
+-      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+-          dwarf_formsdata(&attr, result) != 0)
+-              return -ENOENT;
+-
+-      return 0;
+-}
+-
+ /**
+  * die_is_signed_type - Check whether a type DIE is signed or not
+  * @tp_die: a DIE of a type
+@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
+ /* Get the call file index number in CU DIE */
+ static int die_get_call_fileno(Dwarf_Die *in_die)
+ {
+-      Dwarf_Sword idx;
++      Dwarf_Word idx;
+-      if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
++      if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
+ /* Get the declared file index number in CU DIE */
+ static int die_get_decl_fileno(Dwarf_Die *pdie)
+ {
+-      Dwarf_Sword idx;
++      Dwarf_Word idx;
+-      if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
++      if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
+               return (int)idx;
+       else
+               return -ENOENT;
+-- 
+2.35.1
+
diff --git a/queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch b/queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch
new file mode 100644 (file)
index 0000000..903c312
--- /dev/null
@@ -0,0 +1,54 @@
+From e480e680d894c61da72df39bfd72d3f251bff844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Nov 2022 22:48:39 +0900
+Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ]
+
+Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute
+acccessor functions, so that it can find the specified attribute from
+abstact origin DIE etc.
+
+Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
+Acked-by: Namhyung Kim <namhyung@kernel.org>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
+Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/dwarf-aux.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
+index 4343356f3cf9..dc02685a1eec 100644
+--- a/tools/perf/util/dwarf-aux.c
++++ b/tools/perf/util/dwarf-aux.c
+@@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+       Dwarf_Attribute attr;
+-      if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formudata(&attr, result) != 0)
+               return -ENOENT;
+@@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
+ {
+       Dwarf_Attribute attr;
+-      if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
++      if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
+           dwarf_formsdata(&attr, result) != 0)
+               return -ENOENT;
+-- 
+2.35.1
+
diff --git a/queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch b/queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch
new file mode 100644 (file)
index 0000000..d51ed74
--- /dev/null
@@ -0,0 +1,52 @@
+From 42bfbd41424e10fcc90207a8958b6eac3b94d26f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 13:09:00 +0400
+Subject: perf tools: Fix resources leak in perf_data__open_dir()
+
+From: Miaoqian Lin <linmq006@gmail.com>
+
+[ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ]
+
+In perf_data__open_dir(), opendir() opens the directory stream.  Add
+missing closedir() to release it after use.
+
+Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function")
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/util/data.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
+index 48754083791d..29d32ba046b5 100644
+--- a/tools/perf/util/data.c
++++ b/tools/perf/util/data.c
+@@ -127,6 +127,7 @@ int perf_data__open_dir(struct perf_data *data)
+               file->size = st.st_size;
+       }
++      closedir(dir);
+       if (!files)
+               return -EINVAL;
+@@ -135,6 +136,7 @@ int perf_data__open_dir(struct perf_data *data)
+       return 0;
+ out_err:
++      closedir(dir);
+       close_dir(files, nr);
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch b/queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch
new file mode 100644 (file)
index 0000000..a404161
--- /dev/null
@@ -0,0 +1,103 @@
+From cec73bb81bf3534b075582b5e336bf854e379150 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Dec 2022 14:52:28 +0300
+Subject: qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure
+
+From: Daniil Tatianin <d-tatianin@yandex-team.ru>
+
+[ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ]
+
+adapter->dcb would get silently freed inside qlcnic_dcb_enable() in
+case qlcnic_dcb_attach() would return an error, which always happens
+under OOM conditions. This would lead to use-after-free because both
+of the existing callers invoke qlcnic_dcb_get_info() on the obtained
+pointer, which is potentially freed at that point.
+
+Propagate errors from qlcnic_dcb_enable(), and instead free the dcb
+pointer at callsite using qlcnic_dcb_free(). This also removes the now
+unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around
+kfree() also causing memory leaks for partially initialized dcb.
+
+Found by Linux Verification Center (linuxtesting.org) with the SVACE
+static analysis tool.
+
+Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs")
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Signed-off-by: Daniil Tatianin <d-tatianin@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c |  8 +++++++-
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h       | 10 ++--------
+ drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c      |  8 +++++++-
+ 3 files changed, 16 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+index d2c190732d3e..beeeec8516b8 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c
+@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac)
+               goto disable_mbx_intr;
+       qlcnic_83xx_clear_function_resources(adapter);
+-      qlcnic_dcb_enable(adapter->dcb);
++
++      err = qlcnic_dcb_enable(adapter->dcb);
++      if (err) {
++              qlcnic_dcb_free(adapter->dcb);
++              goto disable_mbx_intr;
++      }
++
+       qlcnic_83xx_initialize_nic(adapter, 1);
+       qlcnic_dcb_get_info(adapter->dcb);
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+index 7519773eaca6..22afa2be85fd 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h
+@@ -41,11 +41,6 @@ struct qlcnic_dcb {
+       unsigned long                   state;
+ };
+-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb)
+-{
+-      kfree(dcb);
+-}
+-
+ static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb)
+ {
+       if (dcb && dcb->ops->get_hw_capability)
+@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb)
+               dcb->ops->init_dcbnl_ops(dcb);
+ }
+-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb)
+ {
+-      if (dcb && qlcnic_dcb_attach(dcb))
+-              qlcnic_clear_dcb_ops(dcb);
++      return dcb ? qlcnic_dcb_attach(dcb) : 0;
+ }
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+index 27c07b2412f4..44b745293fd0 100644
+--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+@@ -2622,7 +2622,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+                        "Device does not support MSI interrupts\n");
+       if (qlcnic_82xx_check(adapter)) {
+-              qlcnic_dcb_enable(adapter->dcb);
++              err = qlcnic_dcb_enable(adapter->dcb);
++              if (err) {
++                      qlcnic_dcb_free(adapter->dcb);
++                      dev_err(&pdev->dev, "Failed to enable DCB\n");
++                      goto err_out_free_hw;
++              }
++
+               qlcnic_dcb_get_info(adapter->dcb);
+               err = qlcnic_setup_intr(adapter);
+-- 
+2.35.1
+
diff --git a/queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch b/queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch
new file mode 100644 (file)
index 0000000..2428a95
--- /dev/null
@@ -0,0 +1,68 @@
+From 33bf3c123913208621351b93993c0f9321a8d536 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Dec 2022 10:51:18 +0000
+Subject: ravb: Fix "failed to switch device to config mode" message during
+ unbind
+
+From: Biju Das <biju.das.jz@bp.renesas.com>
+
+[ Upstream commit c72a7e42592b2e18d862cf120876070947000d7a ]
+
+This patch fixes the error "ravb 11c20000.ethernet eth0: failed to switch
+device to config mode" during unbind.
+
+We are doing register access after pm_runtime_put_sync().
+
+We usually do cleanup in reverse order of init. Currently in
+remove(), the "pm_runtime_put_sync" is not in reverse order.
+
+Probe
+       reset_control_deassert(rstc);
+       pm_runtime_enable(&pdev->dev);
+       pm_runtime_get_sync(&pdev->dev);
+
+remove
+       pm_runtime_put_sync(&pdev->dev);
+       unregister_netdev(ndev);
+       ..
+       ravb_mdio_release(priv);
+       pm_runtime_disable(&pdev->dev);
+
+Consider the call to unregister_netdev()
+unregister_netdev->unregister_netdevice_queue->rollback_registered_many
+that calls the below functions which access the registers after
+pm_runtime_put_sync()
+ 1) ravb_get_stats
+ 2) ravb_close
+
+Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
+Cc: stable@vger.kernel.org
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20221214105118.2495313-1-biju.das.jz@bp.renesas.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/renesas/ravb_main.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
+index 9e7b85e178fd..9ec6d63691aa 100644
+--- a/drivers/net/ethernet/renesas/ravb_main.c
++++ b/drivers/net/ethernet/renesas/ravb_main.c
+@@ -2253,11 +2253,11 @@ static int ravb_remove(struct platform_device *pdev)
+                         priv->desc_bat_dma);
+       /* Set reset mode */
+       ravb_write(ndev, CCC_OPC_RESET, CCC);
+-      pm_runtime_put_sync(&pdev->dev);
+       unregister_netdev(ndev);
+       netif_napi_del(&priv->napi[RAVB_NC]);
+       netif_napi_del(&priv->napi[RAVB_BE]);
+       ravb_mdio_release(priv);
++      pm_runtime_put_sync(&pdev->dev);
+       pm_runtime_disable(&pdev->dev);
+       free_netdev(ndev);
+       platform_set_drvdata(pdev, NULL);
+-- 
+2.35.1
+
diff --git a/queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch b/queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch
new file mode 100644 (file)
index 0000000..1b37917
--- /dev/null
@@ -0,0 +1,95 @@
+From 993586699c4ab15e27a8eae08031347c7bad271b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Dec 2022 14:56:10 +0200
+Subject: RDMA/mlx5: Fix validation of max_rd_atomic caps for DC
+
+From: Maor Gottlieb <maorg@nvidia.com>
+
+[ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ]
+
+Currently, when modifying DC, we validate max_rd_atomic user attribute
+against the RC cap, validate against DC. RC and DC QP types have different
+device limitations.
+
+This can cause userspace created DC QPs to malfunction.
+
+Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP")
+Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com
+Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
+Signed-off-by: Leon Romanovsky <leon@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++----------
+ 1 file changed, 35 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
+index 7a2bec0ac005..0caff276f2c1 100644
+--- a/drivers/infiniband/hw/mlx5/qp.c
++++ b/drivers/infiniband/hw/mlx5/qp.c
+@@ -4258,6 +4258,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev,
+       return false;
+ }
++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr,
++                            int attr_mask, enum ib_qp_type qp_type)
++{
++      int log_max_ra_res;
++      int log_max_ra_req;
++
++      if (qp_type == MLX5_IB_QPT_DCI) {
++              log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_res_dc);
++              log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_req_dc);
++      } else {
++              log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_res_qp);
++              log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev,
++                                                 log_max_ra_req_qp);
++      }
++
++      if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
++          attr->max_rd_atomic > log_max_ra_res) {
++              mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
++                          attr->max_rd_atomic);
++              return false;
++      }
++
++      if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
++          attr->max_dest_rd_atomic > log_max_ra_req) {
++              mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
++                          attr->max_dest_rd_atomic);
++              return false;
++      }
++      return true;
++}
++
+ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+                     int attr_mask, struct ib_udata *udata)
+ {
+@@ -4352,21 +4386,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+               }
+       }
+-      if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
+-          attr->max_rd_atomic >
+-          (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) {
+-              mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n",
+-                          attr->max_rd_atomic);
+-              goto out;
+-      }
+-
+-      if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
+-          attr->max_dest_rd_atomic >
+-          (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) {
+-              mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n",
+-                          attr->max_dest_rd_atomic);
++      if (!validate_rd_atomic(dev, attr, attr_mask, qp_type))
+               goto out;
+-      }
+       if (cur_state == new_state && cur_state == IB_QPS_RESET) {
+               err = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch b/queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch
new file mode 100644 (file)
index 0000000..8b1a74e
--- /dev/null
@@ -0,0 +1,72 @@
+From afa9eeebca764368dc006710606b9309b4ab596c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jan 2021 20:40:14 +0800
+Subject: riscv/stacktrace: Fix stack output without ra on the stack top
+
+From: Chen Huang <chenhuang5@huawei.com>
+
+[ Upstream commit f766f77a74f5784d8d4d3c36b1900731f97d08d0 ]
+
+When a function doesn't have a callee, then it will not
+push ra into the stack, such as lkdtm_BUG() function,
+
+addi   sp,sp,-16
+sd     s0,8(sp)
+addi   s0,sp,16
+ebreak
+
+The struct stackframe use {fp,ra} to get information from
+stack, if walk_stackframe() with pr_regs, we will obtain
+wrong value and bad stacktrace,
+
+[<ffffffe00066c56c>] lkdtm_BUG+0x6/0x8
+---[ end trace 18da3fbdf08e25d5 ]---
+
+Correct the next fp and pc, after that, full stacktrace
+shown as expects,
+
+[<ffffffe00066c56c>] lkdtm_BUG+0x6/0x8
+[<ffffffe0008b24a4>] lkdtm_do_action+0x14/0x1c
+[<ffffffe00066c372>] direct_entry+0xc0/0x10a
+[<ffffffe000439f86>] full_proxy_write+0x42/0x6a
+[<ffffffe000309626>] vfs_write+0x7e/0x214
+[<ffffffe00030992a>] ksys_write+0x98/0xc0
+[<ffffffe000309960>] sys_write+0xe/0x16
+[<ffffffe0002014bc>] ret_from_syscall+0x0/0x2
+---[ end trace 61917f3d9a9fadcd ]---
+
+Signed-off-by: Chen Huang <chenhuang5@huawei.com>
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Stable-dep-of: 5c3022e4a616 ("riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 595342910c3f..6cbde6b43fd2 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -57,9 +57,15 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+               /* Unwind stack frame */
+               frame = (struct stackframe *)fp - 1;
+               sp = fp;
+-              fp = frame->fp;
+-              pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+-                                         (unsigned long *)(fp - 8));
++              if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
++                      fp = frame->ra;
++                      pc = regs->ra;
++              } else {
++                      fp = frame->fp;
++                      pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
++                                                 (unsigned long *)(fp - 8));
++              }
++
+       }
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch b/queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch
new file mode 100644 (file)
index 0000000..9217b03
--- /dev/null
@@ -0,0 +1,41 @@
+From 831306ee94a03f0106cd822d683a3d63dcd88713 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 01:49:36 -0500
+Subject: riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 5c3022e4a616d800cf5f4c3a981d7992179e44a1 ]
+
+The 'retp' is a pointer to the return address on the stack, so we
+must pass the current return address pointer as the 'retp'
+argument to ftrace_push_return_trace(). Not parent function's
+return address on the stack.
+
+Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support")
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 6cbde6b43fd2..1e53fbe5eb78 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -63,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
+               } else {
+                       fp = frame->fp;
+                       pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+-                                                 (unsigned long *)(fp - 8));
++                                                 &frame->ra);
+               }
+       }
+-- 
+2.35.1
+
index 497279a365f1314a200aceb4699cc183a9a401b7..21fc4452fc0629bbe41dbd64406a0fc7b539669c 100644 (file)
@@ -696,3 +696,64 @@ x86-mce-amd-clear-dfr-errors-found-in-thr-handler.patch
 media-s5p-mfc-fix-to-handle-reference-queue-during-f.patch
 media-s5p-mfc-clear-workbit-to-handle-error-conditio.patch
 media-s5p-mfc-fix-in-register-read-and-write-for-h26.patch
+perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch
+perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch
+x86-kprobes-convert-to-insn_decode.patch
+x86-kprobes-fix-optprobe-optimization-check-with-con.patch
+staging-media-tegra-video-fix-device_node-use-after-.patch
+ravb-fix-failed-to-switch-device-to-config-mode-mess.patch
+riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch
+riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch
+ext4-goto-right-label-failed_mount3a.patch
+ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch
+mm-highmem-lift-memcpy_-to-from-_page-to-core.patch
+ext4-use-memcpy_to_page-in-pagecache_write.patch
+fs-ext4-initialize-fsdata-in-pagecache_write.patch
+ext4-move-functions-in-super.c.patch
+ext4-simplify-ext4-error-translation.patch
+ext4-fix-various-seppling-typos.patch
+ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch
+ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch
+mbcache-don-t-reclaim-used-entries.patch
+mbcache-add-functions-to-delete-entry-if-unused.patch
+ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
+ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
+ext4-fix-race-when-reusing-xattr-blocks.patch
+mbcache-automatically-delete-entries-from-cache-on-f.patch
+ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch
+sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch
+bpf-pull-before-calling-skb_postpull_rcsum.patch
+drm-panfrost-fix-gem-handle-creation-ref-counting.patch
+vmxnet3-correctly-report-csum_level-for-encapsulated.patch
+veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch
+nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch
+net-hns3-add-interrupts-re-initialization-while-doin.patch
+net-sched-fix-memory-leak-in-tcindex_set_parms.patch
+qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch
+nfc-fix-potential-resource-leaks.patch
+vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch
+vringh-fix-range-used-in-iotlb_translate.patch
+vhost-fix-range-used-in-translate_desc.patch
+net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch
+net-mlx5-avoid-recovery-in-probe-flows.patch
+net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch
+net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch
+net-amd-xgbe-add-missed-tasklet_kill.patch
+net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch
+rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch
+drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch
+filelock-new-helper-vfs_inode_has_locks.patch
+ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch
+gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch
+net-sched-atm-dont-intepret-cls-results-when-asked-t.patch
+net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch
+netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch
+netfilter-ipset-rework-long-task-execution-when-addi.patch
+perf-tools-fix-resources-leak-in-perf_data__open_dir.patch
+drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch
+usb-rndis_host-secure-rndis_query-check-against-int-.patch
+drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch
+caif-fix-memory-leak-in-cfctrl_linkup_request.patch
+udf-fix-extension-of-the-last-extent-in-the-file.patch
+asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch
+nvme-fix-multipath-crash-caused-by-flush-request-whe.patch
diff --git a/queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch b/queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch
new file mode 100644 (file)
index 0000000..93bf5f1
--- /dev/null
@@ -0,0 +1,63 @@
+From 313fcdde82b935db9e65234c26bba235ac95bd6e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 12:01:02 +0100
+Subject: staging: media: tegra-video: fix device_node use after free
+
+From: Luca Ceresoli <luca.ceresoli@bootlin.com>
+
+[ Upstream commit c4d344163c3a7f90712525f931a6c016bbb35e18 ]
+
+At probe time this code path is followed:
+
+ * tegra_csi_init
+   * tegra_csi_channels_alloc
+     * for_each_child_of_node(node, channel) -- iterates over channels
+       * automatically gets 'channel'
+         * tegra_csi_channel_alloc()
+           * saves into chan->of_node a pointer to the channel OF node
+       * automatically gets and puts 'channel'
+       * now the node saved in chan->of_node has refcount 0, can disappear
+   * tegra_csi_channels_init
+     * iterates over channels
+       * tegra_csi_channel_init -- uses chan->of_node
+
+After that, chan->of_node keeps storing the node until the device is
+removed.
+
+of_node_get() the node and of_node_put() it during teardown to avoid any
+risk.
+
+Fixes: 1ebaeb09830f ("media: tegra-video: Add support for external sensor capture")
+Cc: stable@vger.kernel.org
+Cc: Sowjanya Komatineni <skomatineni@nvidia.com>
+Signed-off-by: Luca Ceresoli <luca.ceresoli@bootlin.com>
+Signed-off-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/staging/media/tegra-video/csi.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c
+index edfdf6db457d..dc5d432a09e8 100644
+--- a/drivers/staging/media/tegra-video/csi.c
++++ b/drivers/staging/media/tegra-video/csi.c
+@@ -420,7 +420,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi,
+       chan->csi = csi;
+       chan->csi_port_num = port_num;
+       chan->numlanes = lanes;
+-      chan->of_node = node;
++      chan->of_node = of_node_get(node);
+       chan->numpads = num_pads;
+       if (num_pads & 0x2) {
+               chan->pads[0].flags = MEDIA_PAD_FL_SINK;
+@@ -621,6 +621,7 @@ static void tegra_csi_channels_cleanup(struct tegra_csi *csi)
+                       media_entity_cleanup(&subdev->entity);
+               }
++              of_node_put(chan->of_node);
+               list_del(&chan->list);
+               kfree(chan);
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch b/queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch
new file mode 100644 (file)
index 0000000..7b21336
--- /dev/null
@@ -0,0 +1,133 @@
+From 60fb278dfec2a5c778924ec719059742d758f667 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Dec 2022 13:14:31 +0900
+Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall
+
+From: minoura makoto <minoura@valinux.co.jp>
+
+[ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ]
+
+Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid
+but different gss service") introduced `auth` argument to
+__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
+since it (and auth->service) was not (yet) determined.
+
+When multiple upcalls with the same uid and different service are
+ongoing, it could happen that __gss_find_upcall(), which returns the
+first match found in the pipe->in_downcall list, could not find the
+correct gss_msg corresponding to the downcall we are looking for.
+Moreover, it might return a msg which is not sent to rpc.gssd yet.
+
+We could see mount.nfs process hung in D state with multiple mount.nfs
+are executed in parallel.  The call trace below is of CentOS 7.9
+kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
+elrepo kernel-ml-6.0.7-1.el7.
+
+PID: 71258  TASK: ffff91ebd4be0000  CPU: 36  COMMAND: "mount.nfs"
+ #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
+ #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
+ #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
+ #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
+[sunrpc]
+ #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
+ #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
+ #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
+ #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
+ #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
+ #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]
+
+The scenario is like this. Let's say there are two upcalls for
+services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.
+
+When rpc.gssd reads pipe to get the upcall msg corresponding to
+service B from pipe->pipe and then writes the response, in
+gss_pipe_downcall the msg corresponding to service A will be picked
+because only uid is used to find the msg and it is before the one for
+B in pipe->in_downcall.  And the process waiting for the msg
+corresponding to service A will be woken up.
+
+Actual scheduing of that process might be after rpc.gssd processes the
+next msg.  In rpc_pipe_generic_upcall it clears msg->errno (for A).
+The process is scheduled to see gss_msg->ctx == NULL and
+gss_msg->msg.errno == 0, therefore it cannot break the loop in
+gss_create_upcall and is never woken up after that.
+
+This patch adds a simple check to ensure that a msg which is not
+sent to rpc.gssd yet is not chosen as the matching upcall upon
+receiving a downcall.
+
+Signed-off-by: minoura makoto <minoura@valinux.co.jp>
+Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
+Cc: Trond Myklebust <trondmy@hammerspace.com>
+Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service")
+Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sunrpc/rpc_pipe_fs.h |  5 +++++
+ net/sunrpc/auth_gss/auth_gss.c     | 19 +++++++++++++++++--
+ 2 files changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
+index cd188a527d16..3b35b6f6533a 100644
+--- a/include/linux/sunrpc/rpc_pipe_fs.h
++++ b/include/linux/sunrpc/rpc_pipe_fs.h
+@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
+                                      char __user *, size_t);
+ extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
++/* returns true if the msg is in-flight, i.e., already eaten by the peer */
++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
++      return (msg->copied != 0 && list_empty(&msg->list));
++}
++
+ struct rpc_clnt;
+ extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
+ extern int rpc_remove_client_dir(struct rpc_clnt *);
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 5f42aa5fc612..2ff66a6a7e54 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -301,7 +301,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
+       list_for_each_entry(pos, &pipe->in_downcall, list) {
+               if (!uid_eq(pos->uid, uid))
+                       continue;
+-              if (auth && pos->auth->service != auth->service)
++              if (pos->auth->service != auth->service)
+                       continue;
+               refcount_inc(&pos->count);
+               return pos;
+@@ -685,6 +685,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
+       return err;
+ }
++static struct gss_upcall_msg *
++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
++{
++      struct gss_upcall_msg *pos;
++      list_for_each_entry(pos, &pipe->in_downcall, list) {
++              if (!uid_eq(pos->uid, uid))
++                      continue;
++              if (!rpc_msg_is_inflight(&pos->msg))
++                      continue;
++              refcount_inc(&pos->count);
++              return pos;
++      }
++      return NULL;
++}
++
+ #define MSG_BUF_MAXSIZE 1024
+ static ssize_t
+@@ -731,7 +746,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
+       err = -ENOENT;
+       /* Find a matching upcall */
+       spin_lock(&pipe->lock);
+-      gss_msg = __gss_find_upcall(pipe, uid, NULL);
++      gss_msg = gss_find_downcall(pipe, uid);
+       if (gss_msg == NULL) {
+               spin_unlock(&pipe->lock);
+               goto err_put_ctx;
+-- 
+2.35.1
+
diff --git a/queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch b/queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch
new file mode 100644 (file)
index 0000000..dd8e186
--- /dev/null
@@ -0,0 +1,37 @@
+From a907a679427fde4f26b7adee8f228079c3052710 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Dec 2022 17:45:51 +0100
+Subject: udf: Fix extension of the last extent in the file
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ]
+
+When extending the last extent in the file within the last block, we
+wrongly computed the length of the last extent. This is mostly a
+cosmetical problem since the extent does not contain any data and the
+length will be fixed up by following operations but still.
+
+Fixes: 1f3868f06855 ("udf: Fix extending file within last block")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index e94a18bb7f99..2132bfab67f3 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -599,7 +599,7 @@ static void udf_do_extend_final_block(struct inode *inode,
+        */
+       if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK))
+               return;
+-      added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen;
++      added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK);
+       last_ext->extLength += added_bytes;
+       UDF_I(inode)->i_lenExtents += added_bytes;
+-- 
+2.35.1
+
diff --git a/queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch b/queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch
new file mode 100644 (file)
index 0000000..050173e
--- /dev/null
@@ -0,0 +1,43 @@
+From 6f1609ba6535cb41c4734901ab0b7963164c653e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 10:17:09 +0100
+Subject: usb: rndis_host: Secure rndis_query check against int overflow
+
+From: Szymon Heidrich <szymon.heidrich@gmail.com>
+
+[ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ]
+
+Variables off and len typed as uint32 in rndis_query function
+are controlled by incoming RNDIS response message thus their
+value may be manipulated. Setting off to a unexpectetly large
+value will cause the sum with len and 8 to overflow and pass
+the implemented validation step. Consequently the response
+pointer will be referring to a location past the expected
+buffer boundaries allowing information leakage e.g. via
+RNDIS_OID_802_3_PERMANENT_ADDRESS OID.
+
+Fixes: ddda08624013 ("USB: rndis_host, various cleanups")
+Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/rndis_host.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c
+index 1505fe3f87ed..1ff723e15d52 100644
+--- a/drivers/net/usb/rndis_host.c
++++ b/drivers/net/usb/rndis_host.c
+@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf,
+       off = le32_to_cpu(u.get_c->offset);
+       len = le32_to_cpu(u.get_c->len);
+-      if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE))
++      if (unlikely((off > CONTROL_BUFFER_SIZE - 8) ||
++                   (len > CONTROL_BUFFER_SIZE - 8 - off)))
+               goto response_error;
+       if (*reply_len != -1 && len != *reply_len)
+-- 
+2.35.1
+
diff --git a/queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch b/queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch
new file mode 100644 (file)
index 0000000..8a477aa
--- /dev/null
@@ -0,0 +1,88 @@
+From 19ad77894525f87a61981b22619cca345fdc9e79 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 12:59:03 -0600
+Subject: veth: Fix race with AF_XDP exposing old or uninitialized descriptors
+
+From: Shawn Bohrer <sbohrer@cloudflare.com>
+
+[ Upstream commit fa349e396e4886d742fd6501c599ec627ef1353b ]
+
+When AF_XDP is used on on a veth interface the RX ring is updated in two
+steps.  veth_xdp_rcv() removes packet descriptors from the FILL ring
+fills them and places them in the RX ring updating the cached_prod
+pointer.  Later xdp_do_flush() syncs the RX ring prod pointer with the
+cached_prod pointer allowing user-space to see the recently filled in
+descriptors.  The rings are intended to be SPSC, however the existing
+order in veth_poll allows the xdp_do_flush() to run concurrently with
+another CPU creating a race condition that allows user-space to see old
+or uninitialized descriptors in the RX ring.  This bug has been observed
+in production systems.
+
+To summarize, we are expecting this ordering:
+
+CPU 0 __xsk_rcv_zc()
+CPU 0 __xsk_map_flush()
+CPU 2 __xsk_rcv_zc()
+CPU 2 __xsk_map_flush()
+
+But we are seeing this order:
+
+CPU 0 __xsk_rcv_zc()
+CPU 2 __xsk_rcv_zc()
+CPU 0 __xsk_map_flush()
+CPU 2 __xsk_map_flush()
+
+This occurs because we rely on NAPI to ensure that only one napi_poll
+handler is running at a time for the given veth receive queue.
+napi_schedule_prep() will prevent multiple instances from getting
+scheduled. However calling napi_complete_done() signals that this
+napi_poll is complete and allows subsequent calls to
+napi_schedule_prep() and __napi_schedule() to succeed in scheduling a
+concurrent napi_poll before the xdp_do_flush() has been called.  For the
+veth driver a concurrent call to napi_schedule_prep() and
+__napi_schedule() can occur on a different CPU because the veth xmit
+path can additionally schedule a napi_poll creating the race.
+
+The fix as suggested by Magnus Karlsson, is to simply move the
+xdp_do_flush() call before napi_complete_done().  This syncs the
+producer ring pointers before another instance of napi_poll can be
+scheduled on another CPU.  It will also slightly improve performance by
+moving the flush closer to when the descriptors were placed in the
+RX ring.
+
+Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT")
+Suggested-by: Magnus Karlsson <magnus.karlsson@gmail.com>
+Signed-off-by: Shawn Bohrer <sbohrer@cloudflare.com>
+Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/veth.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/veth.c b/drivers/net/veth.c
+index 5be8ed910553..5aa23a036ed3 100644
+--- a/drivers/net/veth.c
++++ b/drivers/net/veth.c
+@@ -849,6 +849,9 @@ static int veth_poll(struct napi_struct *napi, int budget)
+       xdp_set_return_frame_no_direct();
+       done = veth_xdp_rcv(rq, budget, &bq, &stats);
++      if (stats.xdp_redirect > 0)
++              xdp_do_flush();
++
+       if (done < budget && napi_complete_done(napi, done)) {
+               /* Write rx_notify_masked before reading ptr_ring */
+               smp_store_mb(rq->rx_notify_masked, false);
+@@ -862,8 +865,6 @@ static int veth_poll(struct napi_struct *napi, int budget)
+       if (stats.xdp_tx > 0)
+               veth_xdp_flush(rq, &bq);
+-      if (stats.xdp_redirect > 0)
+-              xdp_do_flush();
+       xdp_clear_return_frame_no_direct();
+       return done;
+-- 
+2.35.1
+
diff --git a/queue-5.10/vhost-fix-range-used-in-translate_desc.patch b/queue-5.10/vhost-fix-range-used-in-translate_desc.patch
new file mode 100644 (file)
index 0000000..81667e5
--- /dev/null
@@ -0,0 +1,55 @@
+From 1d66f3722bf0492cb75c49cdaf3f6888a18ffa18 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 11:25:03 +0100
+Subject: vhost: fix range used in translate_desc()
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ]
+
+vhost_iotlb_itree_first() requires `start` and `last` parameters
+to search for a mapping that overlaps the range.
+
+In translate_desc() we cyclically call vhost_iotlb_itree_first(),
+incrementing `addr` by the amount already translated, so rightly
+we move the `start` parameter passed to vhost_iotlb_itree_first(),
+but we should hold the `last` parameter constant.
+
+Let's fix it by saving the `last` parameter value before incrementing
+`addr` in the loop.
+
+Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109102503.18816-3-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vhost.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
+index f41463ab4031..da00a5c57db6 100644
+--- a/drivers/vhost/vhost.c
++++ b/drivers/vhost/vhost.c
+@@ -2041,7 +2041,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+       struct vhost_dev *dev = vq->dev;
+       struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem;
+       struct iovec *_iov;
+-      u64 s = 0;
++      u64 s = 0, last = addr + len - 1;
+       int ret = 0;
+       while ((u64)len > s) {
+@@ -2051,7 +2051,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len,
+                       break;
+               }
+-              map = vhost_iotlb_itree_first(umem, addr, addr + len - 1);
++              map = vhost_iotlb_itree_first(umem, addr, last);
+               if (map == NULL || map->start > addr) {
+                       if (umem != dev->iotlb) {
+                               ret = -EFAULT;
+-- 
+2.35.1
+
diff --git a/queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch b/queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch
new file mode 100644 (file)
index 0000000..ac43a5c
--- /dev/null
@@ -0,0 +1,64 @@
+From c16e2f737ce8ca4b8ca7537af351af8503334f82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Nov 2022 10:17:05 +0000
+Subject: vhost/vsock: Fix error handling in vhost_vsock_init()
+
+From: Yuan Can <yuancan@huawei.com>
+
+[ Upstream commit 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 ]
+
+A problem about modprobe vhost_vsock failed is triggered with the
+following log given:
+
+modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy
+
+The reason is that vhost_vsock_init() returns misc_register() directly
+without checking its return value, if misc_register() failed, it returns
+without calling vsock_core_unregister() on vhost_transport, resulting the
+vhost_vsock can never be installed later.
+A simple call graph is shown as below:
+
+ vhost_vsock_init()
+   vsock_core_register() # register vhost_transport
+   misc_register()
+     device_create_with_groups()
+       device_create_groups_vargs()
+         dev = kzalloc(...) # OOM happened
+   # return without unregister vhost_transport
+
+Fix by calling vsock_core_unregister() when misc_register() returns error.
+
+Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko")
+Signed-off-by: Yuan Can <yuancan@huawei.com>
+Message-Id: <20221108101705.45981-1-yuancan@huawei.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vsock.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
+index b0153617fe0e..7bce5f982e58 100644
+--- a/drivers/vhost/vsock.c
++++ b/drivers/vhost/vsock.c
+@@ -854,7 +854,14 @@ static int __init vhost_vsock_init(void)
+                                 VSOCK_TRANSPORT_F_H2G);
+       if (ret < 0)
+               return ret;
+-      return misc_register(&vhost_vsock_misc);
++
++      ret = misc_register(&vhost_vsock_misc);
++      if (ret) {
++              vsock_core_unregister(&vhost_transport.transport);
++              return ret;
++      }
++
++      return 0;
+ };
+ static void __exit vhost_vsock_exit(void)
+-- 
+2.35.1
+
diff --git a/queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch b/queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch
new file mode 100644 (file)
index 0000000..35e19df
--- /dev/null
@@ -0,0 +1,55 @@
+From 9e2579a3f21959106fea0a91504686982c460d28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Dec 2022 12:25:55 -0800
+Subject: vmxnet3: correctly report csum_level for encapsulated packet
+
+From: Ronak Doshi <doshir@vmware.com>
+
+[ Upstream commit 3d8f2c4269d08f8793e946279dbdf5e972cc4911 ]
+
+Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload
+support") added support for encapsulation offload. However, the
+pathc did not report correctly the csum_level for encapsulated packet.
+
+This patch fixes this issue by reporting correct csum level for the
+encapsulated packet.
+
+Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support")
+Signed-off-by: Ronak Doshi <doshir@vmware.com>
+Acked-by: Peng Li <lpeng@vmware.com>
+Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
+index 43a4bcdd92c1..3b889fed9882 100644
+--- a/drivers/net/vmxnet3/vmxnet3_drv.c
++++ b/drivers/net/vmxnet3/vmxnet3_drv.c
+@@ -1236,6 +1236,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+                   (le32_to_cpu(gdesc->dword[3]) &
+                    VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
++                      if ((le32_to_cpu(gdesc->dword[0]) &
++                                   (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++                              skb->csum_level = 1;
++                      }
+                       WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+                                    !(le32_to_cpu(gdesc->dword[0]) &
+                                    (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+@@ -1245,6 +1249,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
+               } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
+                                            (1 << VMXNET3_RCD_TUC_SHIFT))) {
+                       skb->ip_summed = CHECKSUM_UNNECESSARY;
++                      if ((le32_to_cpu(gdesc->dword[0]) &
++                                   (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) {
++                              skb->csum_level = 1;
++                      }
+                       WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+                                    !(le32_to_cpu(gdesc->dword[0]) &
+                                    (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+-- 
+2.35.1
+
diff --git a/queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch b/queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch
new file mode 100644 (file)
index 0000000..03ed9bf
--- /dev/null
@@ -0,0 +1,56 @@
+From dbaecb214467cc40fbbb4ca94799671c27766dab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Nov 2022 11:25:02 +0100
+Subject: vringh: fix range used in iotlb_translate()
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit f85efa9b0f5381874f727bd98f56787840313f0b ]
+
+vhost_iotlb_itree_first() requires `start` and `last` parameters
+to search for a mapping that overlaps the range.
+
+In iotlb_translate() we cyclically call vhost_iotlb_itree_first(),
+incrementing `addr` by the amount already translated, so rightly
+we move the `start` parameter passed to vhost_iotlb_itree_first(),
+but we should hold the `last` parameter constant.
+
+Let's fix it by saving the `last` parameter value before incrementing
+`addr` in the loop.
+
+Fixes: 9ad9c49cfe97 ("vringh: IOTLB support")
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Message-Id: <20221109102503.18816-2-sgarzare@redhat.com>
+Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vhost/vringh.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
+index 5a0340c85dc6..48f4ec2ba40a 100644
+--- a/drivers/vhost/vringh.c
++++ b/drivers/vhost/vringh.c
+@@ -1077,7 +1077,7 @@ static int iotlb_translate(const struct vringh *vrh,
+       struct vhost_iotlb_map *map;
+       struct vhost_iotlb *iotlb = vrh->iotlb;
+       int ret = 0;
+-      u64 s = 0;
++      u64 s = 0, last = addr + len - 1;
+       while (len > s) {
+               u64 size, pa, pfn;
+@@ -1087,8 +1087,7 @@ static int iotlb_translate(const struct vringh *vrh,
+                       break;
+               }
+-              map = vhost_iotlb_itree_first(iotlb, addr,
+-                                            addr + len - 1);
++              map = vhost_iotlb_itree_first(iotlb, addr, last);
+               if (!map || map->start > addr) {
+                       ret = -EINVAL;
+                       break;
+-- 
+2.35.1
+
diff --git a/queue-5.10/x86-kprobes-convert-to-insn_decode.patch b/queue-5.10/x86-kprobes-convert-to-insn_decode.patch
new file mode 100644 (file)
index 0000000..a5d6d2a
--- /dev/null
@@ -0,0 +1,100 @@
+From 2c70ea9179e1f1d01d70e3724cca2d74a403326e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Nov 2020 18:10:11 +0100
+Subject: x86/kprobes: Convert to insn_decode()
+
+From: Borislav Petkov <bp@suse.de>
+
+[ Upstream commit 77e768ec1391dc0d6cd89822aa60b9a1c1bd8128 ]
+
+Simplify code, improve decoding error checking.
+
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Acked-by: Masami Hiramatsu <mhiramat@kernel.org>
+Link: https://lkml.kernel.org/r/20210304174237.31945-12-bp@alien8.de
+Stable-dep-of: 63dc6325ff41 ("x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kprobes/core.c | 17 +++++++++++------
+ arch/x86/kernel/kprobes/opt.c  |  9 +++++++--
+ 2 files changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index 97e1d2a9898f..5de757099186 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -293,6 +293,8 @@ static int can_probe(unsigned long paddr)
+       /* Decode instructions */
+       addr = paddr - offset;
+       while (addr < paddr) {
++              int ret;
++
+               /*
+                * Check if the instruction has been modified by another
+                * kprobe, in which case we replace the breakpoint by the
+@@ -304,8 +306,10 @@ static int can_probe(unsigned long paddr)
+               __addr = recover_probed_instruction(buf, addr);
+               if (!__addr)
+                       return 0;
+-              kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE);
+-              insn_get_length(&insn);
++
++              ret = insn_decode(&insn, (void *)__addr, MAX_INSN_SIZE, INSN_MODE_KERN);
++              if (ret < 0)
++                      return 0;
+ #ifdef CONFIG_KGDB
+               /*
+@@ -351,8 +355,8 @@ static int is_IF_modifier(kprobe_opcode_t *insn)
+ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
+ {
+       kprobe_opcode_t buf[MAX_INSN_SIZE];
+-      unsigned long recovered_insn =
+-              recover_probed_instruction(buf, (unsigned long)src);
++      unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src);
++      int ret;
+       if (!recovered_insn || !insn)
+               return 0;
+@@ -362,8 +366,9 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
+                       MAX_INSN_SIZE))
+               return 0;
+-      kernel_insn_init(insn, dest, MAX_INSN_SIZE);
+-      insn_get_length(insn);
++      ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN);
++      if (ret < 0)
++              return 0;
+       /* We can not probe force emulate prefixed instruction */
+       if (insn_has_emulate_prefix(insn))
+diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
+index 08eb23074f92..4299fc865732 100644
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -312,6 +312,8 @@ static int can_optimize(unsigned long paddr)
+       addr = paddr - offset;
+       while (addr < paddr - offset + size) { /* Decode until function end */
+               unsigned long recovered_insn;
++              int ret;
++
+               if (search_exception_tables(addr))
+                       /*
+                        * Since some fixup code will jumps into this function,
+@@ -321,8 +323,11 @@ static int can_optimize(unsigned long paddr)
+               recovered_insn = recover_probed_instruction(buf, addr);
+               if (!recovered_insn)
+                       return 0;
+-              kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE);
+-              insn_get_length(&insn);
++
++              ret = insn_decode(&insn, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN);
++              if (ret < 0)
++                      return 0;
++
+               /*
+                * In the case of detecting unknown breakpoint, this could be
+                * a padding INT3 between functions. Let's check that all the
+-- 
+2.35.1
+
diff --git a/queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch b/queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch
new file mode 100644 (file)
index 0000000..88141a0
--- /dev/null
@@ -0,0 +1,89 @@
+From 5e1c0e63e98ffedb57bb62dc0183a44cb126f2ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Dec 2022 23:35:19 +0900
+Subject: x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit 63dc6325ff41ee9e570bde705ac34a39c5dbeb44 ]
+
+Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping
+speculative execution after function return, kprobe jump optimization
+always fails on the functions with such INT3 inside the function body.
+(It already checks the INT3 padding between functions, but not inside
+ the function)
+
+To avoid this issue, as same as kprobes, check whether the INT3 comes
+from kgdb or not, and if so, stop decoding and make it fail. The other
+INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be
+treated as a one-byte instruction.
+
+Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation")
+Suggested-by: Peter Zijlstra <peterz@infradead.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/167146051929.1374301.7419382929328081706.stgit@devnote3
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kprobes/opt.c | 28 ++++++++--------------------
+ 1 file changed, 8 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
+index 4299fc865732..3d6201492006 100644
+--- a/arch/x86/kernel/kprobes/opt.c
++++ b/arch/x86/kernel/kprobes/opt.c
+@@ -15,6 +15,7 @@
+ #include <linux/extable.h>
+ #include <linux/kdebug.h>
+ #include <linux/kallsyms.h>
++#include <linux/kgdb.h>
+ #include <linux/ftrace.h>
+ #include <linux/objtool.h>
+ #include <linux/pgtable.h>
+@@ -272,19 +273,6 @@ static int insn_is_indirect_jump(struct insn *insn)
+       return ret;
+ }
+-static bool is_padding_int3(unsigned long addr, unsigned long eaddr)
+-{
+-      unsigned char ops;
+-
+-      for (; addr < eaddr; addr++) {
+-              if (get_kernel_nofault(ops, (void *)addr) < 0 ||
+-                  ops != INT3_INSN_OPCODE)
+-                      return false;
+-      }
+-
+-      return true;
+-}
+-
+ /* Decode whole function to ensure any instructions don't jump into target */
+ static int can_optimize(unsigned long paddr)
+ {
+@@ -327,15 +315,15 @@ static int can_optimize(unsigned long paddr)
+               ret = insn_decode(&insn, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN);
+               if (ret < 0)
+                       return 0;
+-
++#ifdef CONFIG_KGDB
+               /*
+-               * In the case of detecting unknown breakpoint, this could be
+-               * a padding INT3 between functions. Let's check that all the
+-               * rest of the bytes are also INT3.
++               * If there is a dynamically installed kgdb sw breakpoint,
++               * this function should not be probed.
+                */
+-              if (insn.opcode.bytes[0] == INT3_INSN_OPCODE)
+-                      return is_padding_int3(addr, paddr - offset + size) ? 1 : 0;
+-
++              if (insn.opcode.bytes[0] == INT3_INSN_OPCODE &&
++                  kgdb_has_hit_break(addr))
++                      return 0;
++#endif
+               /* Recover address */
+               insn.kaddr = (void *)addr;
+               insn.next_byte = (void *)(addr + insn.length);
+-- 
+2.35.1
+