From: Sasha Levin Date: Tue, 10 Jan 2023 01:55:36 +0000 (-0500) Subject: Fixes for 5.10 X-Git-Tag: v5.15.87~43 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5224f850994da00681c9295a6b8a5110480d9846;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch b/queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch new file mode 100644 index 00000000000..166c06ad66b --- /dev/null +++ b/queue-5.10/asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch @@ -0,0 +1,59 @@ +From d8f13e5a796c3a9e068b494702f7e0dda43da1db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 13:32:46 +0100 +Subject: ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071 + tablet + +From: Hans de Goede + +[ Upstream commit a1dec9d70b6ad97087b60b81d2492134a84208c6 ] + +The Advantech MICA-071 tablet deviates from the defaults for +a non CR Bay Trail based tablet in several ways: + +1. It uses an analog MIC on IN3 rather then using DMIC1 +2. It only has 1 speaker +3. It needs the OVCD current threshold to be set to 1500uA instead of + the default 2000uA to reliable differentiate between headphones vs + headsets + +Add a quirk with these settings for this tablet. + +Signed-off-by: Hans de Goede +Acked-by: Pierre-Louis Bossart +Link: https://lore.kernel.org/r/20221213123246.11226-1-hdegoede@redhat.com +Signed-off-by: Mark Brown +Signed-off-by: Sasha Levin +--- + sound/soc/intel/boards/bytcr_rt5640.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/sound/soc/intel/boards/bytcr_rt5640.c b/sound/soc/intel/boards/bytcr_rt5640.c +index 3020a993f6ef..8a99cb6dfcd6 100644 +--- a/sound/soc/intel/boards/bytcr_rt5640.c ++++ b/sound/soc/intel/boards/bytcr_rt5640.c +@@ -430,6 +430,21 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = { + BYT_RT5640_SSP0_AIF1 | + BYT_RT5640_MCLK_EN), + }, ++ { ++ /* Advantech MICA-071 */ ++ .matches = { ++ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Advantech"), ++ DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "MICA-071"), ++ }, ++ /* OVCD Th = 1500uA to reliable detect head-phones vs -set */ ++ .driver_data = (void *)(BYT_RT5640_IN3_MAP | ++ BYT_RT5640_JD_SRC_JD2_IN4N | ++ BYT_RT5640_OVCD_TH_1500UA | ++ BYT_RT5640_OVCD_SF_0P75 | ++ BYT_RT5640_MONO_SPEAKER | ++ BYT_RT5640_DIFF_MIC | ++ BYT_RT5640_MCLK_EN), ++ }, + { + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ARCHOS"), +-- +2.35.1 + diff --git a/queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch b/queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch new file mode 100644 index 00000000000..fb24a930953 --- /dev/null +++ b/queue-5.10/bpf-pull-before-calling-skb_postpull_rcsum.patch @@ -0,0 +1,61 @@ +From 4f6827171291d128efd7c11c395dd9c39ebbd3ab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 16:47:00 -0800 +Subject: bpf: pull before calling skb_postpull_rcsum() + +From: Jakub Kicinski + +[ Upstream commit 54c3f1a81421f85e60ae2eaae7be3727a09916ee ] + +Anand hit a BUG() when pulling off headers on egress to a SW tunnel. +We get to skb_checksum_help() with an invalid checksum offset +(commit d7ea0d9df2a6 ("net: remove two BUG() from skb_checksum_help()") +converted those BUGs to WARN_ONs()). +He points out oddness in how skb_postpull_rcsum() gets used. +Indeed looks like we should pull before "postpull", otherwise +the CHECKSUM_PARTIAL fixup from skb_postpull_rcsum() will not +be able to do its job: + + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) < 0) + skb->ip_summed = CHECKSUM_NONE; + +Reported-by: Anand Parthasarathy +Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") +Signed-off-by: Jakub Kicinski +Acked-by: Stanislav Fomichev +Link: https://lore.kernel.org/r/20221220004701.402165-1-kuba@kernel.org +Signed-off-by: Martin KaFai Lau +Signed-off-by: Sasha Levin +--- + net/core/filter.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/net/core/filter.c b/net/core/filter.c +index e3cdbd4996e0..a5df0cf46bbf 100644 +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3201,15 +3201,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) + + static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len) + { ++ void *old_data; ++ + /* skb_ensure_writable() is not needed here, as we're + * already working on an uncloned skb. + */ + if (unlikely(!pskb_may_pull(skb, off + len))) + return -ENOMEM; + +- skb_postpull_rcsum(skb, skb->data + off, len); +- memmove(skb->data + len, skb->data, off); ++ old_data = skb->data; + __skb_pull(skb, len); ++ skb_postpull_rcsum(skb, old_data + off, len); ++ memmove(skb->data, old_data, off); + + return 0; + } +-- +2.35.1 + diff --git a/queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch b/queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch new file mode 100644 index 00000000000..c4981cdb139 --- /dev/null +++ b/queue-5.10/caif-fix-memory-leak-in-cfctrl_linkup_request.patch @@ -0,0 +1,47 @@ +From 7416533f696975e399043b8bcabadb04a2fdc487 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 4 Jan 2023 14:51:46 +0800 +Subject: caif: fix memory leak in cfctrl_linkup_request() + +From: Zhengchao Shao + +[ Upstream commit fe69230f05897b3de758427b574fc98025dfc907 ] + +When linktype is unknown or kzalloc failed in cfctrl_linkup_request(), +pkt is not released. Add release process to error path. + +Fixes: b482cd2053e3 ("net-caif: add CAIF core protocol stack") +Fixes: 8d545c8f958f ("caif: Disconnect without waiting for response") +Signed-off-by: Zhengchao Shao +Reviewed-by: Jiri Pirko +Link: https://lore.kernel.org/r/20230104065146.1153009-1-shaozhengchao@huawei.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/caif/cfctrl.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c +index 2809cbd6b7f7..d8cb4b2a076b 100644 +--- a/net/caif/cfctrl.c ++++ b/net/caif/cfctrl.c +@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer, + default: + pr_warn("Request setup of bad link type = %d\n", + param->linktype); ++ cfpkt_destroy(pkt); + return -EINVAL; + } + req = kzalloc(sizeof(*req), GFP_KERNEL); +- if (!req) ++ if (!req) { ++ cfpkt_destroy(pkt); + return -ENOMEM; ++ } ++ + req->client_layer = user_layer; + req->cmd = CFCTRL_CMD_LINK_SETUP; + req->param = *param; +-- +2.35.1 + diff --git a/queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch b/queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch new file mode 100644 index 00000000000..73869449373 --- /dev/null +++ b/queue-5.10/ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch @@ -0,0 +1,85 @@ +From 3346e174106072f3133daa6aa377a4779ebadadf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Nov 2022 10:43:21 +0800 +Subject: ceph: switch to vfs_inode_has_locks() to fix file lock bug + +From: Xiubo Li + +[ Upstream commit 461ab10ef7e6ea9b41a0571a7fc6a72af9549a3c ] + +For the POSIX locks they are using the same owner, which is the +thread id. And multiple POSIX locks could be merged into single one, +so when checking whether the 'file' has locks may fail. + +For a file where some openers use locking and others don't is a +really odd usage pattern though. Locks are like stoplights -- they +only work if everyone pays attention to them. + +Just switch ceph_get_caps() to check whether any locks are set on +the inode. If there are POSIX/OFD/FLOCK locks on the file at the +time, we should set CHECK_FILELOCK, regardless of what fd was used +to set the lock. + +Fixes: ff5d913dfc71 ("ceph: return -EIO if read/write against filp that lost file locks") +Signed-off-by: Xiubo Li +Reviewed-by: Jeff Layton +Reviewed-by: Ilya Dryomov +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + fs/ceph/caps.c | 2 +- + fs/ceph/locks.c | 4 ---- + fs/ceph/super.h | 1 - + 3 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c +index 51562d36fa83..210496dc2fd4 100644 +--- a/fs/ceph/caps.c ++++ b/fs/ceph/caps.c +@@ -2957,7 +2957,7 @@ int ceph_get_caps(struct file *filp, int need, int want, + + while (true) { + flags &= CEPH_FILE_MODE_MASK; +- if (atomic_read(&fi->num_locks)) ++ if (vfs_inode_has_locks(inode)) + flags |= CHECK_FILELOCK; + _got = 0; + ret = try_get_cap_refs(inode, need, want, endoff, +diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c +index 048a435a29be..674d6ea89f71 100644 +--- a/fs/ceph/locks.c ++++ b/fs/ceph/locks.c +@@ -32,18 +32,14 @@ void __init ceph_flock_init(void) + + static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src) + { +- struct ceph_file_info *fi = dst->fl_file->private_data; + struct inode *inode = file_inode(dst->fl_file); + atomic_inc(&ceph_inode(inode)->i_filelock_ref); +- atomic_inc(&fi->num_locks); + } + + static void ceph_fl_release_lock(struct file_lock *fl) + { +- struct ceph_file_info *fi = fl->fl_file->private_data; + struct inode *inode = file_inode(fl->fl_file); + struct ceph_inode_info *ci = ceph_inode(inode); +- atomic_dec(&fi->num_locks); + if (atomic_dec_and_test(&ci->i_filelock_ref)) { + /* clear error when all locks are released */ + spin_lock(&ci->i_ceph_lock); +diff --git a/fs/ceph/super.h b/fs/ceph/super.h +index 4db305fd2a02..8716cb618cbb 100644 +--- a/fs/ceph/super.h ++++ b/fs/ceph/super.h +@@ -772,7 +772,6 @@ struct ceph_file_info { + struct list_head rw_contexts; + + u32 filp_gen; +- atomic_t num_locks; + }; + + struct ceph_dir_file_info { +-- +2.35.1 + diff --git a/queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch b/queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch new file mode 100644 index 00000000000..233a783448a --- /dev/null +++ b/queue-5.10/drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch @@ -0,0 +1,39 @@ +From 79eb1ed97715af650b6d0a3366f4d85a08d94bb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 12:53:35 +0300 +Subject: drivers/net/bonding/bond_3ad: return when there's no aggregator + +From: Daniil Tatianin + +[ Upstream commit 9c807965483f42df1d053b7436eedd6cf28ece6f ] + +Otherwise we would dereference a NULL aggregator pointer when calling +__set_agg_ports_ready on the line below. + +Found by Linux Verification Center (linuxtesting.org) with the SVACE +static analysis tool. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Daniil Tatianin +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/bonding/bond_3ad.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c +index acb6ff0be5ff..320e5461853f 100644 +--- a/drivers/net/bonding/bond_3ad.c ++++ b/drivers/net/bonding/bond_3ad.c +@@ -1520,6 +1520,7 @@ static void ad_port_selection_logic(struct port *port, bool *update_slave_arr) + slave_err(bond->dev, port->slave->dev, + "Port %d did not find a suitable aggregator\n", + port->actor_port_number); ++ return; + } + } + /* if all aggregator's ports are READY_N == TRUE, set ready=TRUE +-- +2.35.1 + diff --git a/queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch b/queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch new file mode 100644 index 00000000000..868132f22ec --- /dev/null +++ b/queue-5.10/drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch @@ -0,0 +1,36 @@ +From 94f909db59e7ffeb90145539f2849033de797840 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 15 Nov 2022 16:15:18 +0300 +Subject: drm/i915: unpin on error in intel_vgpu_shadow_mm_pin() + +From: Dan Carpenter + +[ Upstream commit 3792fc508c095abd84b10ceae12bd773e61fdc36 ] + +Call intel_vgpu_unpin_mm() on this error path. + +Fixes: 418741480809 ("drm/i915/gvt: Adding ppgtt to GVT GEM context after shadow pdps settled.") +Signed-off-by: Dan Carpenter +Signed-off-by: Zhenyu Wang +Link: http://patchwork.freedesktop.org/patch/msgid/Y3OQ5tgZIVxyQ/WV@kili +Reviewed-by: Zhenyu Wang +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gvt/scheduler.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c +index aed2ef6466a2..2bb6203298bc 100644 +--- a/drivers/gpu/drm/i915/gvt/scheduler.c ++++ b/drivers/gpu/drm/i915/gvt/scheduler.c +@@ -647,6 +647,7 @@ intel_vgpu_shadow_mm_pin(struct intel_vgpu_workload *workload) + + if (workload->shadow_mm->type != INTEL_GVT_MM_PPGTT || + !workload->shadow_mm->ppgtt_mm.shadowed) { ++ intel_vgpu_unpin_mm(workload->shadow_mm); + gvt_vgpu_err("workload shadow ppgtt isn't ready\n"); + return -EINVAL; + } +-- +2.35.1 + diff --git a/queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch b/queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch new file mode 100644 index 00000000000..e12cb4ec015 --- /dev/null +++ b/queue-5.10/drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch @@ -0,0 +1,56 @@ +From d096a74e4142c8ddf8f64d6a89dbb54c2ba56376 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 09:43:05 +0100 +Subject: drm/meson: Reduce the FIFO lines held when AFBC is not used + +From: Carlo Caione + +[ Upstream commit 3b754ed6d1cd90017e66e5cc16f3923e4a952ffc ] + +Having a bigger number of FIFO lines held after vsync is only useful to +SoCs using AFBC to give time to the AFBC decoder to be reset, configured +and enabled again. + +For SoCs not using AFBC this, on the contrary, is causing on some +displays issues and a few pixels vertical offset in the displayed image. + +Conditionally increase the number of lines held after vsync only for +SoCs using AFBC, leaving the default value for all the others. + +Fixes: 24e0d4058eff ("drm/meson: hold 32 lines after vsync to give time for AFBC start") +Signed-off-by: Carlo Caione +Acked-by: Martin Blumenstingl +Acked-by: Neil Armstrong +[narmstrong: added fixes tag] +Signed-off-by: Neil Armstrong +Link: https://patchwork.freedesktop.org/patch/msgid/20221216-afbc_s905x-v1-0-033bebf780d9@baylibre.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/meson/meson_viu.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c +index d4b907889a21..cd399b0b7181 100644 +--- a/drivers/gpu/drm/meson/meson_viu.c ++++ b/drivers/gpu/drm/meson/meson_viu.c +@@ -436,15 +436,14 @@ void meson_viu_init(struct meson_drm *priv) + + /* Initialize OSD1 fifo control register */ + reg = VIU_OSD_DDR_PRIORITY_URGENT | +- VIU_OSD_HOLD_FIFO_LINES(31) | + VIU_OSD_FIFO_DEPTH_VAL(32) | /* fifo_depth_val: 32*8=256 */ + VIU_OSD_WORDS_PER_BURST(4) | /* 4 words in 1 burst */ + VIU_OSD_FIFO_LIMITS(2); /* fifo_lim: 2*16=32 */ + + if (meson_vpu_is_compatible(priv, VPU_COMPATIBLE_G12A)) +- reg |= VIU_OSD_BURST_LENGTH_32; ++ reg |= (VIU_OSD_BURST_LENGTH_32 | VIU_OSD_HOLD_FIFO_LINES(31)); + else +- reg |= VIU_OSD_BURST_LENGTH_64; ++ reg |= (VIU_OSD_BURST_LENGTH_64 | VIU_OSD_HOLD_FIFO_LINES(4)); + + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD1_FIFO_CTRL_STAT)); + writel_relaxed(reg, priv->io_base + _REG(VIU_OSD2_FIFO_CTRL_STAT)); +-- +2.35.1 + diff --git a/queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch b/queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch new file mode 100644 index 00000000000..91979d7f76a --- /dev/null +++ b/queue-5.10/drm-panfrost-fix-gem-handle-creation-ref-counting.patch @@ -0,0 +1,138 @@ +From 97dae042255b2074cb2a8ed47e6c9c116d6de05e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 14:01:30 +0000 +Subject: drm/panfrost: Fix GEM handle creation ref-counting + +From: Steven Price + +[ Upstream commit 4217c6ac817451d5116687f3cc6286220dc43d49 ] + +panfrost_gem_create_with_handle() previously returned a BO but with the +only reference being from the handle, which user space could in theory +guess and release, causing a use-after-free. Additionally if the call to +panfrost_gem_mapping_get() in panfrost_ioctl_create_bo() failed then +a(nother) reference on the BO was dropped. + +The _create_with_handle() is a problematic pattern, so ditch it and +instead create the handle in panfrost_ioctl_create_bo(). If the call to +panfrost_gem_mapping_get() fails then this means that user space has +indeed gone behind our back and freed the handle. In which case just +return an error code. + +Reported-by: Rob Clark +Fixes: f3ba91228e8e ("drm/panfrost: Add initial panfrost driver") +Signed-off-by: Steven Price +Reviewed-by: Rob Clark +Link: https://patchwork.freedesktop.org/patch/msgid/20221219140130.410578-1-steven.price@arm.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/panfrost/panfrost_drv.c | 27 ++++++++++++++++--------- + drivers/gpu/drm/panfrost/panfrost_gem.c | 16 +-------------- + drivers/gpu/drm/panfrost/panfrost_gem.h | 5 +---- + 3 files changed, 20 insertions(+), 28 deletions(-) + +diff --git a/drivers/gpu/drm/panfrost/panfrost_drv.c b/drivers/gpu/drm/panfrost/panfrost_drv.c +index 1dfc457bbefc..4af25c0b6570 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_drv.c ++++ b/drivers/gpu/drm/panfrost/panfrost_drv.c +@@ -81,6 +81,7 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + struct panfrost_gem_object *bo; + struct drm_panfrost_create_bo *args = data; + struct panfrost_gem_mapping *mapping; ++ int ret; + + if (!args->size || args->pad || + (args->flags & ~(PANFROST_BO_NOEXEC | PANFROST_BO_HEAP))) +@@ -91,21 +92,29 @@ static int panfrost_ioctl_create_bo(struct drm_device *dev, void *data, + !(args->flags & PANFROST_BO_NOEXEC)) + return -EINVAL; + +- bo = panfrost_gem_create_with_handle(file, dev, args->size, args->flags, +- &args->handle); ++ bo = panfrost_gem_create(dev, args->size, args->flags); + if (IS_ERR(bo)) + return PTR_ERR(bo); + ++ ret = drm_gem_handle_create(file, &bo->base.base, &args->handle); ++ if (ret) ++ goto out; ++ + mapping = panfrost_gem_mapping_get(bo, priv); +- if (!mapping) { +- drm_gem_object_put(&bo->base.base); +- return -EINVAL; ++ if (mapping) { ++ args->offset = mapping->mmnode.start << PAGE_SHIFT; ++ panfrost_gem_mapping_put(mapping); ++ } else { ++ /* This can only happen if the handle from ++ * drm_gem_handle_create() has already been guessed and freed ++ * by user space ++ */ ++ ret = -EINVAL; + } + +- args->offset = mapping->mmnode.start << PAGE_SHIFT; +- panfrost_gem_mapping_put(mapping); +- +- return 0; ++out: ++ drm_gem_object_put(&bo->base.base); ++ return ret; + } + + /** +diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.c b/drivers/gpu/drm/panfrost/panfrost_gem.c +index 1d917cea5ceb..c843fbfdb878 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gem.c ++++ b/drivers/gpu/drm/panfrost/panfrost_gem.c +@@ -232,12 +232,8 @@ struct drm_gem_object *panfrost_gem_create_object(struct drm_device *dev, size_t + } + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle) ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags) + { +- int ret; + struct drm_gem_shmem_object *shmem; + struct panfrost_gem_object *bo; + +@@ -253,16 +249,6 @@ panfrost_gem_create_with_handle(struct drm_file *file_priv, + bo->noexec = !!(flags & PANFROST_BO_NOEXEC); + bo->is_heap = !!(flags & PANFROST_BO_HEAP); + +- /* +- * Allocate an id of idr table where the obj is registered +- * and handle has the id what user can see. +- */ +- ret = drm_gem_handle_create(file_priv, &shmem->base, handle); +- /* drop reference from allocate - handle holds it now. */ +- drm_gem_object_put(&shmem->base); +- if (ret) +- return ERR_PTR(ret); +- + return bo; + } + +diff --git a/drivers/gpu/drm/panfrost/panfrost_gem.h b/drivers/gpu/drm/panfrost/panfrost_gem.h +index 8088d5fd8480..ad2877eeeccd 100644 +--- a/drivers/gpu/drm/panfrost/panfrost_gem.h ++++ b/drivers/gpu/drm/panfrost/panfrost_gem.h +@@ -69,10 +69,7 @@ panfrost_gem_prime_import_sg_table(struct drm_device *dev, + struct sg_table *sgt); + + struct panfrost_gem_object * +-panfrost_gem_create_with_handle(struct drm_file *file_priv, +- struct drm_device *dev, size_t size, +- u32 flags, +- uint32_t *handle); ++panfrost_gem_create(struct drm_device *dev, size_t size, u32 flags); + + int panfrost_gem_open(struct drm_gem_object *obj, struct drm_file *file_priv); + void panfrost_gem_close(struct drm_gem_object *obj, +-- +2.35.1 + diff --git a/queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch b/queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch new file mode 100644 index 00000000000..c3047554d3e --- /dev/null +++ b/queue-5.10/ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch @@ -0,0 +1,55 @@ +From 28685ef1bf313b266ed0bed36413816327baa182 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 15:43:43 +0800 +Subject: ext4: correct inconsistent error msg in nojournal mode + +From: Baokun Li + +[ Upstream commit 89481b5fa8c0640e62ba84c6020cee895f7ac643 ] + +When we used the journal_async_commit mounting option in nojournal mode, +the kernel told me that "can't mount with journal_checksum", was very +confusing. I find that when we mount with journal_async_commit, both the +JOURNAL_ASYNC_COMMIT and EXPLICIT_JOURNAL_CHECKSUM flags are set. However, +in the error branch, CHECKSUM is checked before ASYNC_COMMIT. As a result, +the above inconsistency occurs, and the ASYNC_COMMIT branch becomes dead +code that cannot be executed. Therefore, we exchange the positions of the +two judgments to make the error msg more accurate. + +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20221109074343.4184862-1-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index eb82c1d4883c..43f06a71d612 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4812,14 +4812,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + goto failed_mount3a; + } else { + /* Nojournal mode, all journal mount options are illegal */ +- if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { ++ if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " +- "journal_checksum, fs mounted w/o journal"); ++ "journal_async_commit, fs mounted w/o journal"); + goto failed_mount3a; + } +- if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { ++ ++ if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " +- "journal_async_commit, fs mounted w/o journal"); ++ "journal_checksum, fs mounted w/o journal"); + goto failed_mount3a; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { +-- +2.35.1 + diff --git a/queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch b/queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch new file mode 100644 index 00000000000..bd4009c6d6f --- /dev/null +++ b/queue-5.10/ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch @@ -0,0 +1,143 @@ +From b6f8806be68152490927a71716b0f89e04c18313 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Nov 2022 20:39:50 +0100 +Subject: ext4: fix deadlock due to mbcache entry corruption + +From: Jan Kara + +[ Upstream commit a44e84a9b7764c72896f7241a0ec9ac7e7ef38dd ] + +When manipulating xattr blocks, we can deadlock infinitely looping +inside ext4_xattr_block_set() where we constantly keep finding xattr +block for reuse in mbcache but we are unable to reuse it because its +reference count is too big. This happens because cache entry for the +xattr block is marked as reusable (e_reusable set) although its +reference count is too big. When this inconsistency happens, this +inconsistent state is kept indefinitely and so ext4_xattr_block_set() +keeps retrying indefinitely. + +The inconsistent state is caused by non-atomic update of e_reusable bit. +e_reusable is part of a bitfield and e_reusable update can race with +update of e_referenced bit in the same bitfield resulting in loss of one +of the updates. Fix the problem by using atomic bitops instead. + +This bug has been around for many years, but it became *much* easier +to hit after commit 65f8b80053a1 ("ext4: fix race when reusing xattr +blocks"). + +Cc: stable@vger.kernel.org +Fixes: 6048c64b2609 ("mbcache: add reusable flag to cache entries") +Fixes: 65f8b80053a1 ("ext4: fix race when reusing xattr blocks") +Reported-and-tested-by: Jeremi Piotrowski +Reported-by: Thilo Fromm +Link: https://lore.kernel.org/r/c77bf00f-4618-7149-56f1-b8d1664b9d07@linux.microsoft.com/ +Signed-off-by: Jan Kara +Reviewed-by: Andreas Dilger +Link: https://lore.kernel.org/r/20221123193950.16758-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/xattr.c | 4 ++-- + fs/mbcache.c | 14 ++++++++------ + include/linux/mbcache.h | 9 +++++++-- + 3 files changed, 17 insertions(+), 10 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 35251afdf770..6bf1c62eff04 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1275,7 +1275,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + ce = mb_cache_entry_get(ea_block_cache, hash, + bh->b_blocknr); + if (ce) { +- ce->e_reusable = 1; ++ set_bit(MBE_REUSABLE_B, &ce->e_flags); + mb_cache_entry_put(ea_block_cache, ce); + } + } +@@ -2037,7 +2037,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + } + BHDR(new_bh)->h_refcount = cpu_to_le32(ref); + if (ref == EXT4_XATTR_REFCOUNT_MAX) +- ce->e_reusable = 0; ++ clear_bit(MBE_REUSABLE_B, &ce->e_flags); + ea_bdebug(new_bh, "reusing; refcount now=%d", + ref); + ext4_xattr_block_csum_set(inode, new_bh); +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 950f1829a7fd..7a12ae87c806 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -94,8 +94,9 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_value = value; +- entry->e_reusable = reusable; +- entry->e_referenced = 0; ++ entry->e_flags = 0; ++ if (reusable) ++ set_bit(MBE_REUSABLE_B, &entry->e_flags); + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) { +@@ -162,7 +163,8 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + while (node) { + entry = hlist_bl_entry(node, struct mb_cache_entry, + e_hash_list); +- if (entry->e_key == key && entry->e_reusable && ++ if (entry->e_key == key && ++ test_bit(MBE_REUSABLE_B, &entry->e_flags) && + atomic_inc_not_zero(&entry->e_refcnt)) + goto out; + node = node->next; +@@ -318,7 +320,7 @@ EXPORT_SYMBOL(mb_cache_entry_delete_or_get); + void mb_cache_entry_touch(struct mb_cache *cache, + struct mb_cache_entry *entry) + { +- entry->e_referenced = 1; ++ set_bit(MBE_REFERENCED_B, &entry->e_flags); + } + EXPORT_SYMBOL(mb_cache_entry_touch); + +@@ -343,9 +345,9 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + entry = list_first_entry(&cache->c_list, + struct mb_cache_entry, e_list); + /* Drop initial hash reference if there is no user */ +- if (entry->e_referenced || ++ if (test_bit(MBE_REFERENCED_B, &entry->e_flags) || + atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { +- entry->e_referenced = 0; ++ clear_bit(MBE_REFERENCED_B, &entry->e_flags); + list_move_tail(&entry->e_list, &cache->c_list); + continue; + } +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index e9d5ece87794..591bc4cefe1d 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -10,6 +10,12 @@ + + struct mb_cache; + ++/* Cache entry flags */ ++enum { ++ MBE_REFERENCED_B = 0, ++ MBE_REUSABLE_B ++}; ++ + struct mb_cache_entry { + /* List of entries in cache - protected by cache->c_list_lock */ + struct list_head e_list; +@@ -26,8 +32,7 @@ struct mb_cache_entry { + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; +- u32 e_referenced:1; +- u32 e_reusable:1; ++ unsigned long e_flags; + /* User provided value - stable during lifetime of the entry */ + u64 e_value; + }; +-- +2.35.1 + diff --git a/queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch b/queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch new file mode 100644 index 00000000000..ff6d70af6d0 --- /dev/null +++ b/queue-5.10/ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch @@ -0,0 +1,48 @@ +From f5248d98f78ebaa8c7a3f5f96c9a8f75988b8b9c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 6 Nov 2022 14:48:37 -0800 +Subject: ext4: fix leaking uninitialized memory in fast-commit journal + +From: Eric Biggers + +[ Upstream commit 594bc43b410316d70bb42aeff168837888d96810 ] + +When space at the end of fast-commit journal blocks is unused, make sure +to zero it out so that uninitialized memory is not leaked to disk. + +Fixes: aa75f4d3daae ("ext4: main fast-commit commit path") +Cc: # v5.10+ +Signed-off-by: Eric Biggers +Link: https://lore.kernel.org/r/20221106224841.279231-4-ebiggers@kernel.org +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/fast_commit.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 3b2d6106a703..eaa26477bceb 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -628,6 +628,9 @@ static u8 *ext4_fc_reserve_space(struct super_block *sb, int len, u32 *crc) + *crc = ext4_chksum(sbi, *crc, tl, sizeof(*tl)); + if (pad_len > 0) + ext4_fc_memzero(sb, tl + 1, pad_len, crc); ++ /* Don't leak uninitialized memory in the unused last byte. */ ++ *((u8 *)(tl + 1) + pad_len) = 0; ++ + ext4_fc_submit_bh(sb); + + ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh); +@@ -684,6 +687,8 @@ static int ext4_fc_write_tail(struct super_block *sb, u32 crc) + dst += sizeof(tail.fc_tid); + tail.fc_crc = cpu_to_le32(crc); + ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL); ++ dst += sizeof(tail.fc_crc); ++ memset(dst, 0, bsize - off); /* Don't leak uninitialized memory. */ + + ext4_fc_submit_bh(sb); + +-- +2.35.1 + diff --git a/queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch b/queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch new file mode 100644 index 00000000000..1ee5673150a --- /dev/null +++ b/queue-5.10/ext4-fix-race-when-reusing-xattr-blocks.patch @@ -0,0 +1,180 @@ +From 4cae8ae405bffc57437d22fe54d6abbd9ff15725 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:24 +0200 +Subject: ext4: fix race when reusing xattr blocks + +From: Jan Kara + +[ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ] + +When ext4_xattr_block_set() decides to remove xattr block the following +race can happen: + +CPU1 CPU2 +ext4_xattr_block_set() ext4_xattr_release_block() + new_bh = ext4_xattr_block_cache_find() + + lock_buffer(bh); + ref = le32_to_cpu(BHDR(bh)->h_refcount); + if (ref == 1) { + ... + mb_cache_entry_delete(); + unlock_buffer(bh); + ext4_free_blocks(); + ... + ext4_forget(..., bh, ...); + jbd2_journal_revoke(..., bh); + + ext4_journal_get_write_access(..., new_bh, ...) + do_get_write_access() + jbd2_journal_cancel_revoke(..., new_bh); + +Later the code in ext4_xattr_block_set() finds out the block got freed +and cancels reusal of the block but the revoke stays canceled and so in +case of block reuse and journal replay the filesystem can get corrupted. +If the race works out slightly differently, we can also hit assertions +in the jbd2 code. + +Fix the problem by making sure that once matching mbcache entry is +found, code dropping the last xattr block reference (or trying to modify +xattr block in place) waits until the mbcache entry reference is +dropped. This way code trying to reuse xattr block is protected from +someone trying to drop the last reference to xattr block. + +Reported-and-tested-by: Ritesh Harjani +CC: stable@vger.kernel.org +Fixes: 82939d7999df ("ext4: convert to mbcache2") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++---------------- + 1 file changed, 45 insertions(+), 22 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 9d5ccc90eb63..35251afdf770 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, + /* Remove entry from mbcache when EA inode is getting evicted */ + void ext4_evict_ea_inode(struct inode *inode) + { +- if (EA_INODE_CACHE(inode)) +- mb_cache_entry_delete(EA_INODE_CACHE(inode), +- ext4_xattr_inode_get_hash(inode), inode->i_ino); ++ struct mb_cache_entry *oe; ++ ++ if (!EA_INODE_CACHE(inode)) ++ return; ++ /* Wait for entry to get unused so that we can remove it */ ++ while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode), ++ ext4_xattr_inode_get_hash(inode), inode->i_ino))) { ++ mb_cache_entry_wait_unused(oe); ++ mb_cache_entry_put(EA_INODE_CACHE(inode), oe); ++ } + } + + static int +@@ -1223,6 +1230,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + if (error) + goto out; + ++retry_ref: + lock_buffer(bh); + hash = le32_to_cpu(BHDR(bh)->h_hash); + ref = le32_to_cpu(BHDR(bh)->h_refcount); +@@ -1232,9 +1240,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, + * This must happen under buffer lock for + * ext4_xattr_block_set() to reliably detect freed block + */ +- if (ea_block_cache) +- mb_cache_entry_delete(ea_block_cache, hash, +- bh->b_blocknr); ++ if (ea_block_cache) { ++ struct mb_cache_entry *oe; ++ ++ oe = mb_cache_entry_delete_or_get(ea_block_cache, hash, ++ bh->b_blocknr); ++ if (oe) { ++ unlock_buffer(bh); ++ mb_cache_entry_wait_unused(oe); ++ mb_cache_entry_put(ea_block_cache, oe); ++ goto retry_ref; ++ } ++ } + get_bh(bh); + unlock_buffer(bh); + +@@ -1862,9 +1879,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + * ext4_xattr_block_set() to reliably detect modified + * block + */ +- if (ea_block_cache) +- mb_cache_entry_delete(ea_block_cache, hash, +- bs->bh->b_blocknr); ++ if (ea_block_cache) { ++ struct mb_cache_entry *oe; ++ ++ oe = mb_cache_entry_delete_or_get(ea_block_cache, ++ hash, bs->bh->b_blocknr); ++ if (oe) { ++ /* ++ * Xattr block is getting reused. Leave ++ * it alone. ++ */ ++ mb_cache_entry_put(ea_block_cache, oe); ++ goto clone_block; ++ } ++ } + ea_bdebug(bs->bh, "modifying in-place"); + error = ext4_xattr_set_entry(i, s, handle, inode, + true /* is_block */); +@@ -1880,6 +1908,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + goto cleanup; + goto inserted; + } ++clone_block: + unlock_buffer(bs->bh); + ea_bdebug(bs->bh, "cloning"); + s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); +@@ -1985,18 +2014,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + lock_buffer(new_bh); + /* + * We have to be careful about races with +- * freeing, rehashing or adding references to +- * xattr block. Once we hold buffer lock xattr +- * block's state is stable so we can check +- * whether the block got freed / rehashed or +- * not. Since we unhash mbcache entry under +- * buffer lock when freeing / rehashing xattr +- * block, checking whether entry is still +- * hashed is reliable. Same rules hold for +- * e_reusable handling. ++ * adding references to xattr block. Once we ++ * hold buffer lock xattr block's state is ++ * stable so we can check the additional ++ * reference fits. + */ +- if (hlist_bl_unhashed(&ce->e_hash_list) || +- !ce->e_reusable) { ++ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; ++ if (ref > EXT4_XATTR_REFCOUNT_MAX) { + /* + * Undo everything and check mbcache + * again. +@@ -2011,9 +2035,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + new_bh = NULL; + goto inserted; + } +- ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1; + BHDR(new_bh)->h_refcount = cpu_to_le32(ref); +- if (ref >= EXT4_XATTR_REFCOUNT_MAX) ++ if (ref == EXT4_XATTR_REFCOUNT_MAX) + ce->e_reusable = 0; + ea_bdebug(new_bh, "reusing; refcount now=%d", + ref); +-- +2.35.1 + diff --git a/queue-5.10/ext4-fix-various-seppling-typos.patch b/queue-5.10/ext4-fix-various-seppling-typos.patch new file mode 100644 index 00000000000..d8c452cde1f --- /dev/null +++ b/queue-5.10/ext4-fix-various-seppling-typos.patch @@ -0,0 +1,142 @@ +From 72772ad0d3651502c14a5046229bd2a11a94dbbf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Mar 2021 16:00:05 +0530 +Subject: ext4: fix various seppling typos + +From: Bhaskar Chowdhury + +[ Upstream commit 3088e5a5153cda27ec26461e5edf2821e15e802c ] + +Signed-off-by: Bhaskar Chowdhury +Link: https://lore.kernel.org/r/cover.1616840203.git.unixbhaskar@gmail.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/fast_commit.c | 2 +- + fs/ext4/indirect.c | 2 +- + fs/ext4/inline.c | 2 +- + fs/ext4/inode.c | 2 +- + fs/ext4/mballoc.h | 2 +- + fs/ext4/migrate.c | 6 +++--- + fs/ext4/namei.c | 2 +- + fs/ext4/xattr.c | 2 +- + 8 files changed, 10 insertions(+), 10 deletions(-) + +diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c +index 41dcf21558c4..3b2d6106a703 100644 +--- a/fs/ext4/fast_commit.c ++++ b/fs/ext4/fast_commit.c +@@ -66,7 +66,7 @@ + * Fast Commit Ineligibility + * ------------------------- + * Not all operations are supported by fast commits today (e.g extended +- * attributes). Fast commit ineligiblity is marked by calling one of the ++ * attributes). Fast commit ineligibility is marked by calling one of the + * two following functions: + * + * - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall +diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c +index b7d130f4b5e4..237983cd8cdc 100644 +--- a/fs/ext4/indirect.c ++++ b/fs/ext4/indirect.c +@@ -712,7 +712,7 @@ static int ext4_ind_trunc_restart_fn(handle_t *handle, struct inode *inode, + + /* + * Truncate transactions can be complex and absolutely huge. So we need to +- * be able to restart the transaction at a conventient checkpoint to make ++ * be able to restart the transaction at a convenient checkpoint to make + * sure we don't overflow the journal. + * + * Try to extend this transaction for the purposes of truncation. If +diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c +index 88bd1d1cca23..77377befbb1c 100644 +--- a/fs/ext4/inline.c ++++ b/fs/ext4/inline.c +@@ -799,7 +799,7 @@ ext4_journalled_write_inline_data(struct inode *inode, + * clear the inode state safely. + * 2. The inode has inline data, then we need to read the data, make it + * update and dirty so that ext4_da_writepages can handle it. We don't +- * need to start the journal since the file's metatdata isn't changed now. ++ * need to start the journal since the file's metadata isn't changed now. + */ + static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, + struct inode *inode, +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index d7dbe1eb9da0..2d3004b3fc56 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -3885,7 +3885,7 @@ static int __ext4_block_zero_page_range(handle_t *handle, + * starting from file offset 'from'. The range to be zero'd must + * be contained with in one block. If the specified range exceeds + * the end of the block it will be shortened to end of the block +- * that cooresponds to 'from' ++ * that corresponds to 'from' + */ + static int ext4_block_zero_page_range(handle_t *handle, + struct address_space *mapping, loff_t from, loff_t length) +diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h +index e75b4749aa1c..7be6288e48ec 100644 +--- a/fs/ext4/mballoc.h ++++ b/fs/ext4/mballoc.h +@@ -59,7 +59,7 @@ + * by the stream allocator, which purpose is to pack requests + * as close each to other as possible to produce smooth I/O traffic + * We use locality group prealloc space for stream request. +- * We can tune the same via /proc/fs/ext4//stream_req ++ * We can tune the same via /proc/fs/ext4//stream_req + */ + #define MB_DEFAULT_STREAM_THRESHOLD 16 /* 64K */ + +diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c +index 4bfe2252d9a4..b0ea646454ac 100644 +--- a/fs/ext4/migrate.c ++++ b/fs/ext4/migrate.c +@@ -32,7 +32,7 @@ static int finish_range(handle_t *handle, struct inode *inode, + newext.ee_block = cpu_to_le32(lb->first_block); + newext.ee_len = cpu_to_le16(lb->last_block - lb->first_block + 1); + ext4_ext_store_pblock(&newext, lb->first_pblock); +- /* Locking only for convinience since we are operating on temp inode */ ++ /* Locking only for convenience since we are operating on temp inode */ + down_write(&EXT4_I(inode)->i_data_sem); + path = ext4_find_extent(inode, lb->first_block, NULL, 0); + if (IS_ERR(path)) { +@@ -43,8 +43,8 @@ static int finish_range(handle_t *handle, struct inode *inode, + + /* + * Calculate the credit needed to inserting this extent +- * Since we are doing this in loop we may accumalate extra +- * credit. But below we try to not accumalate too much ++ * Since we are doing this in loop we may accumulate extra ++ * credit. But below we try to not accumulate too much + * of them by restarting the journal. + */ + needed = ext4_ext_calc_credits_for_single_extent(inode, +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index c17d5f399f9e..ce4962bb62bc 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -995,7 +995,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, + * If the hash is 1, then continue only if the next page has a + * continuation hash of any value. This is used for readdir + * handling. Otherwise, check to see if the hash matches the +- * desired contiuation hash. If it doesn't, return since ++ * desired continuation hash. If it doesn't, return since + * there's no point to read in the successive index pages. + */ + bhash = dx_get_hash(p->at); +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 795ef72f0d3c..74d045b426dd 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1617,7 +1617,7 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, + * If storing the value in an external inode is an option, + * reserve space for xattr entries/names in the external + * attribute block so that a long value does not occupy the +- * whole space and prevent futher entries being added. ++ * whole space and prevent further entries being added. + */ + if (ext4_has_feature_ea_inode(inode->i_sb) && + new_size && is_block && +-- +2.35.1 + diff --git a/queue-5.10/ext4-goto-right-label-failed_mount3a.patch b/queue-5.10/ext4-goto-right-label-failed_mount3a.patch new file mode 100644 index 00000000000..c58ba3a4e62 --- /dev/null +++ b/queue-5.10/ext4-goto-right-label-failed_mount3a.patch @@ -0,0 +1,69 @@ +From c37c890467d3573aa820e8ca32e3686da528a79c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 16 Sep 2022 22:15:12 +0800 +Subject: ext4: goto right label 'failed_mount3a' + +From: Jason Yan + +[ Upstream commit 43bd6f1b49b61f43de4d4e33661b8dbe8c911f14 ] + +Before these two branches neither loaded the journal nor created the +xattr cache. So the right label to goto is 'failed_mount3a'. Although +this did not cause any issues because the error handler validated if the +pointer is null. However this still made me confused when reading +the code. So it's still worth to modify to goto the right label. + +Signed-off-by: Jason Yan +Reviewed-by: Jan Kara +Reviewed-by: Ritesh Harjani (IBM) +Link: https://lore.kernel.org/r/20220916141527.1012715-2-yanaijie@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 89481b5fa8c0 ("ext4: correct inconsistent error msg in nojournal mode") +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index aa7bcc856de9..eb82c1d4883c 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -4809,30 +4809,30 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + ext4_has_feature_journal_needs_recovery(sb)) { + ext4_msg(sb, KERN_ERR, "required journal recovery " + "suppressed and not mounted read-only"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } else { + /* Nojournal mode, all journal mount options are illegal */ + if (test_opt2(sb, EXPLICIT_JOURNAL_CHECKSUM)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_checksum, fs mounted w/o journal"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "journal_async_commit, fs mounted w/o journal"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + if (sbi->s_commit_interval != JBD2_DEFAULT_MAX_COMMIT_AGE*HZ) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "commit=%lu, fs mounted w/o journal", + sbi->s_commit_interval / HZ); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + if (EXT4_MOUNT_DATA_FLAGS & + (sbi->s_mount_opt ^ sbi->s_def_mount_opt)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "data=, fs mounted w/o journal"); +- goto failed_mount_wq; ++ goto failed_mount3a; + } + sbi->s_def_mount_opt &= ~EXT4_MOUNT_JOURNAL_CHECKSUM; + clear_opt(sb, JOURNAL_CHECKSUM); +-- +2.35.1 + diff --git a/queue-5.10/ext4-move-functions-in-super.c.patch b/queue-5.10/ext4-move-functions-in-super.c.patch new file mode 100644 index 00000000000..f84594b7076 --- /dev/null +++ b/queue-5.10/ext4-move-functions-in-super.c.patch @@ -0,0 +1,240 @@ +From 8e493fa7d24ac097d8be5f683f18de1c0b5d8f8c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Nov 2020 12:33:58 +0100 +Subject: ext4: move functions in super.c + +From: Jan Kara + +[ Upstream commit 4067662388f97d0f360e568820d9d5bac6a3c9fa ] + +Just move error info related functions in super.c close to +ext4_handle_error(). We'll want to combine save_error_info() with +ext4_handle_error() and this makes change more obvious and saves a +forward declaration as well. No functional change. + +Signed-off-by: Jan Kara +Reviewed-by: Andreas Dilger +Link: https://lore.kernel.org/r/20201127113405.26867-6-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 196 ++++++++++++++++++++++++------------------------ + 1 file changed, 98 insertions(+), 98 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 43f06a71d612..982341939a27 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -417,104 +417,6 @@ static time64_t __ext4_get_tstamp(__le32 *lo, __u8 *hi) + #define ext4_get_tstamp(es, tstamp) \ + __ext4_get_tstamp(&(es)->tstamp, &(es)->tstamp ## _hi) + +-static void __save_error_info(struct super_block *sb, int error, +- __u32 ino, __u64 block, +- const char *func, unsigned int line) +-{ +- struct ext4_super_block *es = EXT4_SB(sb)->s_es; +- int err; +- +- EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; +- if (bdev_read_only(sb->s_bdev)) +- return; +- es->s_state |= cpu_to_le16(EXT4_ERROR_FS); +- ext4_update_tstamp(es, s_last_error_time); +- strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); +- es->s_last_error_line = cpu_to_le32(line); +- es->s_last_error_ino = cpu_to_le32(ino); +- es->s_last_error_block = cpu_to_le64(block); +- switch (error) { +- case EIO: +- err = EXT4_ERR_EIO; +- break; +- case ENOMEM: +- err = EXT4_ERR_ENOMEM; +- break; +- case EFSBADCRC: +- err = EXT4_ERR_EFSBADCRC; +- break; +- case 0: +- case EFSCORRUPTED: +- err = EXT4_ERR_EFSCORRUPTED; +- break; +- case ENOSPC: +- err = EXT4_ERR_ENOSPC; +- break; +- case ENOKEY: +- err = EXT4_ERR_ENOKEY; +- break; +- case EROFS: +- err = EXT4_ERR_EROFS; +- break; +- case EFBIG: +- err = EXT4_ERR_EFBIG; +- break; +- case EEXIST: +- err = EXT4_ERR_EEXIST; +- break; +- case ERANGE: +- err = EXT4_ERR_ERANGE; +- break; +- case EOVERFLOW: +- err = EXT4_ERR_EOVERFLOW; +- break; +- case EBUSY: +- err = EXT4_ERR_EBUSY; +- break; +- case ENOTDIR: +- err = EXT4_ERR_ENOTDIR; +- break; +- case ENOTEMPTY: +- err = EXT4_ERR_ENOTEMPTY; +- break; +- case ESHUTDOWN: +- err = EXT4_ERR_ESHUTDOWN; +- break; +- case EFAULT: +- err = EXT4_ERR_EFAULT; +- break; +- default: +- err = EXT4_ERR_UNKNOWN; +- } +- es->s_last_error_errcode = err; +- if (!es->s_first_error_time) { +- es->s_first_error_time = es->s_last_error_time; +- es->s_first_error_time_hi = es->s_last_error_time_hi; +- strncpy(es->s_first_error_func, func, +- sizeof(es->s_first_error_func)); +- es->s_first_error_line = cpu_to_le32(line); +- es->s_first_error_ino = es->s_last_error_ino; +- es->s_first_error_block = es->s_last_error_block; +- es->s_first_error_errcode = es->s_last_error_errcode; +- } +- /* +- * Start the daily error reporting function if it hasn't been +- * started already +- */ +- if (!es->s_error_count) +- mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); +- le32_add_cpu(&es->s_error_count, 1); +-} +- +-static void save_error_info(struct super_block *sb, int error, +- __u32 ino, __u64 block, +- const char *func, unsigned int line) +-{ +- __save_error_info(sb, error, ino, block, func, line); +- if (!bdev_read_only(sb->s_bdev)) +- ext4_commit_super(sb, 1); +-} +- + /* + * The del_gendisk() function uninitializes the disk-specific data + * structures, including the bdi structure, without telling anyone +@@ -643,6 +545,104 @@ static bool system_going_down(void) + || system_state == SYSTEM_RESTART; + } + ++static void __save_error_info(struct super_block *sb, int error, ++ __u32 ino, __u64 block, ++ const char *func, unsigned int line) ++{ ++ struct ext4_super_block *es = EXT4_SB(sb)->s_es; ++ int err; ++ ++ EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; ++ if (bdev_read_only(sb->s_bdev)) ++ return; ++ es->s_state |= cpu_to_le16(EXT4_ERROR_FS); ++ ext4_update_tstamp(es, s_last_error_time); ++ strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); ++ es->s_last_error_line = cpu_to_le32(line); ++ es->s_last_error_ino = cpu_to_le32(ino); ++ es->s_last_error_block = cpu_to_le64(block); ++ switch (error) { ++ case EIO: ++ err = EXT4_ERR_EIO; ++ break; ++ case ENOMEM: ++ err = EXT4_ERR_ENOMEM; ++ break; ++ case EFSBADCRC: ++ err = EXT4_ERR_EFSBADCRC; ++ break; ++ case 0: ++ case EFSCORRUPTED: ++ err = EXT4_ERR_EFSCORRUPTED; ++ break; ++ case ENOSPC: ++ err = EXT4_ERR_ENOSPC; ++ break; ++ case ENOKEY: ++ err = EXT4_ERR_ENOKEY; ++ break; ++ case EROFS: ++ err = EXT4_ERR_EROFS; ++ break; ++ case EFBIG: ++ err = EXT4_ERR_EFBIG; ++ break; ++ case EEXIST: ++ err = EXT4_ERR_EEXIST; ++ break; ++ case ERANGE: ++ err = EXT4_ERR_ERANGE; ++ break; ++ case EOVERFLOW: ++ err = EXT4_ERR_EOVERFLOW; ++ break; ++ case EBUSY: ++ err = EXT4_ERR_EBUSY; ++ break; ++ case ENOTDIR: ++ err = EXT4_ERR_ENOTDIR; ++ break; ++ case ENOTEMPTY: ++ err = EXT4_ERR_ENOTEMPTY; ++ break; ++ case ESHUTDOWN: ++ err = EXT4_ERR_ESHUTDOWN; ++ break; ++ case EFAULT: ++ err = EXT4_ERR_EFAULT; ++ break; ++ default: ++ err = EXT4_ERR_UNKNOWN; ++ } ++ es->s_last_error_errcode = err; ++ if (!es->s_first_error_time) { ++ es->s_first_error_time = es->s_last_error_time; ++ es->s_first_error_time_hi = es->s_last_error_time_hi; ++ strncpy(es->s_first_error_func, func, ++ sizeof(es->s_first_error_func)); ++ es->s_first_error_line = cpu_to_le32(line); ++ es->s_first_error_ino = es->s_last_error_ino; ++ es->s_first_error_block = es->s_last_error_block; ++ es->s_first_error_errcode = es->s_last_error_errcode; ++ } ++ /* ++ * Start the daily error reporting function if it hasn't been ++ * started already ++ */ ++ if (!es->s_error_count) ++ mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ); ++ le32_add_cpu(&es->s_error_count, 1); ++} ++ ++static void save_error_info(struct super_block *sb, int error, ++ __u32 ino, __u64 block, ++ const char *func, unsigned int line) ++{ ++ __save_error_info(sb, error, ino, block, func, line); ++ if (!bdev_read_only(sb->s_bdev)) ++ ext4_commit_super(sb, 1); ++} ++ + /* Deal with the reporting of failure conditions on a filesystem such as + * inconsistencies detected or read IO failures. + * +-- +2.35.1 + diff --git a/queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch b/queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch new file mode 100644 index 00000000000..ed914bbd7db --- /dev/null +++ b/queue-5.10/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch @@ -0,0 +1,117 @@ +From 6e7fa1c6130b48ca06f96e5a6de00aabf64c231b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:22 +0200 +Subject: ext4: remove EA inode entry from mbcache on inode eviction + +From: Jan Kara + +[ Upstream commit 6bc0d63dad7f9f54d381925ee855b402f652fa39 ] + +Currently we remove EA inode from mbcache as soon as its xattr refcount +drops to zero. However there can be pending attempts to reuse the inode +and thus refcount handling code has to handle the situation when +refcount increases from zero anyway. So save some work and just keep EA +inode in mbcache until it is getting evicted. At that moment we are sure +following iget() of EA inode will fail anyway (or wait for eviction to +finish and load things from the disk again) and so removing mbcache +entry at that moment is fine and simplifies the code a bit. + +CC: stable@vger.kernel.org +Fixes: 82939d7999df ("ext4: convert to mbcache2") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 2 ++ + fs/ext4/xattr.c | 24 ++++++++---------------- + fs/ext4/xattr.h | 1 + + 3 files changed, 11 insertions(+), 16 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 2d3004b3fc56..355343cf4609 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -179,6 +179,8 @@ void ext4_evict_inode(struct inode *inode) + + trace_ext4_evict_inode(inode); + ++ if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL) ++ ext4_evict_ea_inode(inode); + if (inode->i_nlink) { + /* + * When journalling data dirty buffers are tracked only in the +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 0b682c92bfe9..0555f32f0fd4 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino, + return err; + } + ++/* Remove entry from mbcache when EA inode is getting evicted */ ++void ext4_evict_ea_inode(struct inode *inode) ++{ ++ if (EA_INODE_CACHE(inode)) ++ mb_cache_entry_delete(EA_INODE_CACHE(inode), ++ ext4_xattr_inode_get_hash(inode), inode->i_ino); ++} ++ + static int + ext4_xattr_inode_verify_hashes(struct inode *ea_inode, + struct ext4_xattr_entry *entry, void *buffer, +@@ -972,10 +980,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode, + static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, + int ref_change) + { +- struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode); + struct ext4_iloc iloc; + s64 ref_count; +- u32 hash; + int ret; + + inode_lock(ea_inode); +@@ -998,14 +1004,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, + + set_nlink(ea_inode, 1); + ext4_orphan_del(handle, ea_inode); +- +- if (ea_inode_cache) { +- hash = ext4_xattr_inode_get_hash(ea_inode); +- mb_cache_entry_create(ea_inode_cache, +- GFP_NOFS, hash, +- ea_inode->i_ino, +- true /* reusable */); +- } + } + } else { + WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld", +@@ -1018,12 +1016,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode, + + clear_nlink(ea_inode); + ext4_orphan_add(handle, ea_inode); +- +- if (ea_inode_cache) { +- hash = ext4_xattr_inode_get_hash(ea_inode); +- mb_cache_entry_delete(ea_inode_cache, hash, +- ea_inode->i_ino); +- } + } + } + +diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h +index 87e5863bb493..b357872ab83b 100644 +--- a/fs/ext4/xattr.h ++++ b/fs/ext4/xattr.h +@@ -191,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array); + + extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, + struct ext4_inode *raw_inode, handle_t *handle); ++extern void ext4_evict_ea_inode(struct inode *inode); + + extern const struct xattr_handler *ext4_xattr_handlers[]; + +-- +2.35.1 + diff --git a/queue-5.10/ext4-simplify-ext4-error-translation.patch b/queue-5.10/ext4-simplify-ext4-error-translation.patch new file mode 100644 index 00000000000..f145aaa9486 --- /dev/null +++ b/queue-5.10/ext4-simplify-ext4-error-translation.patch @@ -0,0 +1,146 @@ +From 99fadec5f5b52abc5ea4fd166e99dba4a1719a42 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 27 Nov 2020 12:33:59 +0100 +Subject: ext4: simplify ext4 error translation + +From: Jan Kara + +[ Upstream commit 02a7780e4d2fcf438ac6773bc469e7ada2af56be ] + +We convert errno's to ext4 on-disk format error codes in +save_error_info(). Add a function and a bit of macro magic to make this +simpler. + +Signed-off-by: Jan Kara +Reviewed-by: Andreas Dilger +Link: https://lore.kernel.org/r/20201127113405.26867-7-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 95 +++++++++++++++++++++---------------------------- + 1 file changed, 40 insertions(+), 55 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 982341939a27..ced84ed4e592 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -545,76 +545,61 @@ static bool system_going_down(void) + || system_state == SYSTEM_RESTART; + } + ++struct ext4_err_translation { ++ int code; ++ int errno; ++}; ++ ++#define EXT4_ERR_TRANSLATE(err) { .code = EXT4_ERR_##err, .errno = err } ++ ++static struct ext4_err_translation err_translation[] = { ++ EXT4_ERR_TRANSLATE(EIO), ++ EXT4_ERR_TRANSLATE(ENOMEM), ++ EXT4_ERR_TRANSLATE(EFSBADCRC), ++ EXT4_ERR_TRANSLATE(EFSCORRUPTED), ++ EXT4_ERR_TRANSLATE(ENOSPC), ++ EXT4_ERR_TRANSLATE(ENOKEY), ++ EXT4_ERR_TRANSLATE(EROFS), ++ EXT4_ERR_TRANSLATE(EFBIG), ++ EXT4_ERR_TRANSLATE(EEXIST), ++ EXT4_ERR_TRANSLATE(ERANGE), ++ EXT4_ERR_TRANSLATE(EOVERFLOW), ++ EXT4_ERR_TRANSLATE(EBUSY), ++ EXT4_ERR_TRANSLATE(ENOTDIR), ++ EXT4_ERR_TRANSLATE(ENOTEMPTY), ++ EXT4_ERR_TRANSLATE(ESHUTDOWN), ++ EXT4_ERR_TRANSLATE(EFAULT), ++}; ++ ++static int ext4_errno_to_code(int errno) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(err_translation); i++) ++ if (err_translation[i].errno == errno) ++ return err_translation[i].code; ++ return EXT4_ERR_UNKNOWN; ++} ++ + static void __save_error_info(struct super_block *sb, int error, + __u32 ino, __u64 block, + const char *func, unsigned int line) + { + struct ext4_super_block *es = EXT4_SB(sb)->s_es; +- int err; + + EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; ++ /* We default to EFSCORRUPTED error... */ ++ if (error == 0) ++ error = EFSCORRUPTED; + es->s_state |= cpu_to_le16(EXT4_ERROR_FS); + ext4_update_tstamp(es, s_last_error_time); + strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); + es->s_last_error_line = cpu_to_le32(line); + es->s_last_error_ino = cpu_to_le32(ino); + es->s_last_error_block = cpu_to_le64(block); +- switch (error) { +- case EIO: +- err = EXT4_ERR_EIO; +- break; +- case ENOMEM: +- err = EXT4_ERR_ENOMEM; +- break; +- case EFSBADCRC: +- err = EXT4_ERR_EFSBADCRC; +- break; +- case 0: +- case EFSCORRUPTED: +- err = EXT4_ERR_EFSCORRUPTED; +- break; +- case ENOSPC: +- err = EXT4_ERR_ENOSPC; +- break; +- case ENOKEY: +- err = EXT4_ERR_ENOKEY; +- break; +- case EROFS: +- err = EXT4_ERR_EROFS; +- break; +- case EFBIG: +- err = EXT4_ERR_EFBIG; +- break; +- case EEXIST: +- err = EXT4_ERR_EEXIST; +- break; +- case ERANGE: +- err = EXT4_ERR_ERANGE; +- break; +- case EOVERFLOW: +- err = EXT4_ERR_EOVERFLOW; +- break; +- case EBUSY: +- err = EXT4_ERR_EBUSY; +- break; +- case ENOTDIR: +- err = EXT4_ERR_ENOTDIR; +- break; +- case ENOTEMPTY: +- err = EXT4_ERR_ENOTEMPTY; +- break; +- case ESHUTDOWN: +- err = EXT4_ERR_ESHUTDOWN; +- break; +- case EFAULT: +- err = EXT4_ERR_EFAULT; +- break; +- default: +- err = EXT4_ERR_UNKNOWN; +- } +- es->s_last_error_errcode = err; ++ es->s_last_error_errcode = ext4_errno_to_code(error); + if (!es->s_first_error_time) { + es->s_first_error_time = es->s_last_error_time; + es->s_first_error_time_hi = es->s_last_error_time_hi; +-- +2.35.1 + diff --git a/queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch b/queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch new file mode 100644 index 00000000000..7ba43472a43 --- /dev/null +++ b/queue-5.10/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch @@ -0,0 +1,126 @@ +From 1ca65c5d79d24e53bcc991f57037853316bcaf3a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:23 +0200 +Subject: ext4: unindent codeblock in ext4_xattr_block_set() + +From: Jan Kara + +[ Upstream commit fd48e9acdf26d0cbd80051de07d4a735d05d29b2 ] + +Remove unnecessary else (and thus indentation level) from a code block +in ext4_xattr_block_set(). It will also make following code changes +easier. No functional changes. + +CC: stable@vger.kernel.org +Fixes: 82939d7999df ("ext4: convert to mbcache2") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-4-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/ext4/xattr.c | 77 ++++++++++++++++++++++++------------------------- + 1 file changed, 38 insertions(+), 39 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 0555f32f0fd4..9d5ccc90eb63 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1846,6 +1846,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + #define header(x) ((struct ext4_xattr_header *)(x)) + + if (s->base) { ++ int offset = (char *)s->here - bs->bh->b_data; ++ + BUFFER_TRACE(bs->bh, "get_write_access"); + error = ext4_journal_get_write_access(handle, bs->bh); + if (error) +@@ -1877,49 +1879,46 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + if (error) + goto cleanup; + goto inserted; +- } else { +- int offset = (char *)s->here - bs->bh->b_data; ++ } ++ unlock_buffer(bs->bh); ++ ea_bdebug(bs->bh, "cloning"); ++ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); ++ error = -ENOMEM; ++ if (s->base == NULL) ++ goto cleanup; ++ s->first = ENTRY(header(s->base)+1); ++ header(s->base)->h_refcount = cpu_to_le32(1); ++ s->here = ENTRY(s->base + offset); ++ s->end = s->base + bs->bh->b_size; + +- unlock_buffer(bs->bh); +- ea_bdebug(bs->bh, "cloning"); +- s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); +- error = -ENOMEM; +- if (s->base == NULL) ++ /* ++ * If existing entry points to an xattr inode, we need ++ * to prevent ext4_xattr_set_entry() from decrementing ++ * ref count on it because the reference belongs to the ++ * original block. In this case, make the entry look ++ * like it has an empty value. ++ */ ++ if (!s->not_found && s->here->e_value_inum) { ++ ea_ino = le32_to_cpu(s->here->e_value_inum); ++ error = ext4_xattr_inode_iget(inode, ea_ino, ++ le32_to_cpu(s->here->e_hash), ++ &tmp_inode); ++ if (error) + goto cleanup; +- s->first = ENTRY(header(s->base)+1); +- header(s->base)->h_refcount = cpu_to_le32(1); +- s->here = ENTRY(s->base + offset); +- s->end = s->base + bs->bh->b_size; + +- /* +- * If existing entry points to an xattr inode, we need +- * to prevent ext4_xattr_set_entry() from decrementing +- * ref count on it because the reference belongs to the +- * original block. In this case, make the entry look +- * like it has an empty value. +- */ +- if (!s->not_found && s->here->e_value_inum) { +- ea_ino = le32_to_cpu(s->here->e_value_inum); +- error = ext4_xattr_inode_iget(inode, ea_ino, +- le32_to_cpu(s->here->e_hash), +- &tmp_inode); +- if (error) +- goto cleanup; +- +- if (!ext4_test_inode_state(tmp_inode, +- EXT4_STATE_LUSTRE_EA_INODE)) { +- /* +- * Defer quota free call for previous +- * inode until success is guaranteed. +- */ +- old_ea_inode_quota = le32_to_cpu( +- s->here->e_value_size); +- } +- iput(tmp_inode); +- +- s->here->e_value_inum = 0; +- s->here->e_value_size = 0; ++ if (!ext4_test_inode_state(tmp_inode, ++ EXT4_STATE_LUSTRE_EA_INODE)) { ++ /* ++ * Defer quota free call for previous ++ * inode until success is guaranteed. ++ */ ++ old_ea_inode_quota = le32_to_cpu( ++ s->here->e_value_size); + } ++ iput(tmp_inode); ++ ++ s->here->e_value_inum = 0; ++ s->here->e_value_size = 0; + } + } else { + /* Allocate a buffer where we construct the new block. */ +-- +2.35.1 + diff --git a/queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch b/queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch new file mode 100644 index 00000000000..79272b8d114 --- /dev/null +++ b/queue-5.10/ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch @@ -0,0 +1,41 @@ +From d243769f1e1200239d0af80c9781b409717499c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 May 2022 11:01:20 +0800 +Subject: ext4: use kmemdup() to replace kmalloc + memcpy + +From: Shuqi Zhang + +[ Upstream commit 4efd9f0d120c55b08852ee5605dbb02a77089a5d ] + +Replace kmalloc + memcpy with kmemdup() + +Signed-off-by: Shuqi Zhang +Reviewed-by: Ritesh Harjani +Link: https://lore.kernel.org/r/20220525030120.803330-1-zhangshuqi3@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/ext4/xattr.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c +index 74d045b426dd..0b682c92bfe9 100644 +--- a/fs/ext4/xattr.c ++++ b/fs/ext4/xattr.c +@@ -1890,11 +1890,10 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, + + unlock_buffer(bs->bh); + ea_bdebug(bs->bh, "cloning"); +- s->base = kmalloc(bs->bh->b_size, GFP_NOFS); ++ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS); + error = -ENOMEM; + if (s->base == NULL) + goto cleanup; +- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size); + s->first = ENTRY(header(s->base)+1); + header(s->base)->h_refcount = cpu_to_le32(1); + s->here = ENTRY(s->base + offset); +-- +2.35.1 + diff --git a/queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch b/queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch new file mode 100644 index 00000000000..ac55ad8402d --- /dev/null +++ b/queue-5.10/ext4-use-memcpy_to_page-in-pagecache_write.patch @@ -0,0 +1,44 @@ +From 32a2a70a21a968ad8206fd7374f76f59530d42ed Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 7 Feb 2021 11:04:23 -0800 +Subject: ext4: use memcpy_to_page() in pagecache_write() + +From: Chaitanya Kulkarni + +[ Upstream commit bd256fda92efe97b692dc72e246d35fa724d42d8 ] + +Signed-off-by: Chaitanya Kulkarni +Link: https://lore.kernel.org/r/20210207190425.38107-7-chaitanya.kulkarni@wdc.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()") +Signed-off-by: Sasha Levin +--- + fs/ext4/verity.c | 5 +---- + 1 file changed, 1 insertion(+), 4 deletions(-) + +diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c +index 35be8e7ec2a0..130070ec491b 100644 +--- a/fs/ext4/verity.c ++++ b/fs/ext4/verity.c +@@ -80,7 +80,6 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; + void *fsdata; +- void *addr; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, +@@ -88,9 +87,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, + if (res) + return res; + +- addr = kmap_atomic(page); +- memcpy(addr + offset_in_page(pos), buf, n); +- kunmap_atomic(addr); ++ memcpy_to_page(page, offset_in_page(pos), buf, n); + + res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, + page, fsdata); +-- +2.35.1 + diff --git a/queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch b/queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch new file mode 100644 index 00000000000..ef429f059d5 --- /dev/null +++ b/queue-5.10/filelock-new-helper-vfs_inode_has_locks.patch @@ -0,0 +1,89 @@ +From 08714768923658ec3062696c56e0abc88ee7105f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Nov 2022 08:33:09 -0500 +Subject: filelock: new helper: vfs_inode_has_locks + +From: Jeff Layton + +[ Upstream commit ab1ddef98a715eddb65309ffa83267e4e84a571e ] + +Ceph has a need to know whether a particular inode has any locks set on +it. It's currently tracking that by a num_locks field in its +filp->private_data, but that's problematic as it tries to decrement this +field when releasing locks and that can race with the file being torn +down. + +Add a new vfs_inode_has_locks helper that just returns whether any locks +are currently held on the inode. + +Reviewed-by: Xiubo Li +Reviewed-by: Christoph Hellwig +Signed-off-by: Jeff Layton +Stable-dep-of: 461ab10ef7e6 ("ceph: switch to vfs_inode_has_locks() to fix file lock bug") +Signed-off-by: Sasha Levin +--- + fs/locks.c | 23 +++++++++++++++++++++++ + include/linux/fs.h | 6 ++++++ + 2 files changed, 29 insertions(+) + +diff --git a/fs/locks.c b/fs/locks.c +index 32c948fe2944..12d72c3d8756 100644 +--- a/fs/locks.c ++++ b/fs/locks.c +@@ -2813,6 +2813,29 @@ int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + } + EXPORT_SYMBOL_GPL(vfs_cancel_lock); + ++/** ++ * vfs_inode_has_locks - are any file locks held on @inode? ++ * @inode: inode to check for locks ++ * ++ * Return true if there are any FL_POSIX or FL_FLOCK locks currently ++ * set on @inode. ++ */ ++bool vfs_inode_has_locks(struct inode *inode) ++{ ++ struct file_lock_context *ctx; ++ bool ret; ++ ++ ctx = smp_load_acquire(&inode->i_flctx); ++ if (!ctx) ++ return false; ++ ++ spin_lock(&ctx->flc_lock); ++ ret = !list_empty(&ctx->flc_posix) || !list_empty(&ctx->flc_flock); ++ spin_unlock(&ctx->flc_lock); ++ return ret; ++} ++EXPORT_SYMBOL_GPL(vfs_inode_has_locks); ++ + #ifdef CONFIG_PROC_FS + #include + #include +diff --git a/include/linux/fs.h b/include/linux/fs.h +index 9a477e537361..74e19bccbf73 100644 +--- a/include/linux/fs.h ++++ b/include/linux/fs.h +@@ -1145,6 +1145,7 @@ extern int locks_delete_block(struct file_lock *); + extern int vfs_test_lock(struct file *, struct file_lock *); + extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); + extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); ++bool vfs_inode_has_locks(struct inode *inode); + extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); + extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); + extern void lease_get_mtime(struct inode *, struct timespec64 *time); +@@ -1257,6 +1258,11 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) + return 0; + } + ++static inline bool vfs_inode_has_locks(struct inode *inode) ++{ ++ return false; ++} ++ + static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) + { + return -ENOLCK; +-- +2.35.1 + diff --git a/queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch b/queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch new file mode 100644 index 00000000000..05b97fca00f --- /dev/null +++ b/queue-5.10/fs-ext4-initialize-fsdata-in-pagecache_write.patch @@ -0,0 +1,43 @@ +From 117930a41edd8f7ee8aa352912bceb45a74ff912 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 21 Nov 2022 12:21:30 +0100 +Subject: fs: ext4: initialize fsdata in pagecache_write() + +From: Alexander Potapenko + +[ Upstream commit 956510c0c7439e90b8103aaeaf4da92878c622f0 ] + +When aops->write_begin() does not initialize fsdata, KMSAN reports +an error passing the latter to aops->write_end(). + +Fix this by unconditionally initializing fsdata. + +Cc: Eric Biggers +Fixes: c93d8f885809 ("ext4: add basic fs-verity support") +Reported-by: syzbot+9767be679ef5016b6082@syzkaller.appspotmail.com +Signed-off-by: Alexander Potapenko +Reviewed-by: Eric Biggers +Link: https://lore.kernel.org/r/20221121112134.407362-1-glider@google.com +Signed-off-by: Theodore Ts'o +Cc: stable@kernel.org +Signed-off-by: Sasha Levin +--- + fs/ext4/verity.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c +index 130070ec491b..e3019f920222 100644 +--- a/fs/ext4/verity.c ++++ b/fs/ext4/verity.c +@@ -79,7 +79,7 @@ static int pagecache_write(struct inode *inode, const void *buf, size_t count, + size_t n = min_t(size_t, count, + PAGE_SIZE - offset_in_page(pos)); + struct page *page; +- void *fsdata; ++ void *fsdata = NULL; + int res; + + res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, +-- +2.35.1 + diff --git a/queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch b/queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch new file mode 100644 index 00000000000..21ecc30e2fe --- /dev/null +++ b/queue-5.10/gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch @@ -0,0 +1,36 @@ +From 2c9bd3bc094387ab4afb5e0f65b2e2f4c820137a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Jan 2023 12:20:39 +0400 +Subject: gpio: sifive: Fix refcount leak in sifive_gpio_probe + +From: Miaoqian Lin + +[ Upstream commit 694175cd8a1643cde3acb45c9294bca44a8e08e9 ] + +of_irq_find_parent() returns a node pointer with refcount incremented, +We should use of_node_put() on it when not needed anymore. +Add missing of_node_put() to avoid refcount leak. + +Fixes: 96868dce644d ("gpio/sifive: Add GPIO driver for SiFive SoCs") +Signed-off-by: Miaoqian Lin +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-sifive.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/gpio/gpio-sifive.c b/drivers/gpio/gpio-sifive.c +index 4f28fa73450c..a42ffb9f3057 100644 +--- a/drivers/gpio/gpio-sifive.c ++++ b/drivers/gpio/gpio-sifive.c +@@ -195,6 +195,7 @@ static int sifive_gpio_probe(struct platform_device *pdev) + return -ENODEV; + } + parent = irq_find_host(irq_parent); ++ of_node_put(irq_parent); + if (!parent) { + dev_err(dev, "no IRQ parent domain\n"); + return -ENODEV; +-- +2.35.1 + diff --git a/queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch b/queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch new file mode 100644 index 00000000000..a6a366b53cb --- /dev/null +++ b/queue-5.10/mbcache-add-functions-to-delete-entry-if-unused.patch @@ -0,0 +1,156 @@ +From ff21a13cc7c1061e54a6d3402303a5f33559ff7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:21 +0200 +Subject: mbcache: add functions to delete entry if unused + +From: Jan Kara + +[ Upstream commit 3dc96bba65f53daa217f0a8f43edad145286a8f5 ] + +Add function mb_cache_entry_delete_or_get() to delete mbcache entry if +it is unused and also add a function to wait for entry to become unused +- mb_cache_entry_wait_unused(). We do not share code between the two +deleting function as one of them will go away soon. + +CC: stable@vger.kernel.org +Fixes: 82939d7999df ("ext4: convert to mbcache2") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/mbcache.c | 66 +++++++++++++++++++++++++++++++++++++++-- + include/linux/mbcache.h | 10 ++++++- + 2 files changed, 73 insertions(+), 3 deletions(-) + +diff --git a/fs/mbcache.c b/fs/mbcache.c +index cfc28129fb6f..2010bc80a3f2 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -11,7 +11,7 @@ + /* + * Mbcache is a simple key-value store. Keys need not be unique, however + * key-value pairs are expected to be unique (we use this fact in +- * mb_cache_entry_delete()). ++ * mb_cache_entry_delete_or_get()). + * + * Ext2 and ext4 use this cache for deduplication of extended attribute blocks. + * Ext4 also uses it for deduplication of xattr values stored in inodes. +@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry) + } + EXPORT_SYMBOL(__mb_cache_entry_free); + ++/* ++ * mb_cache_entry_wait_unused - wait to be the last user of the entry ++ * ++ * @entry - entry to work on ++ * ++ * Wait to be the last user of the entry. ++ */ ++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) ++{ ++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3); ++} ++EXPORT_SYMBOL(mb_cache_entry_wait_unused); ++ + static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + struct mb_cache_entry *entry, + u32 key) +@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, + } + EXPORT_SYMBOL(mb_cache_entry_get); + +-/* mb_cache_entry_delete - remove a cache entry ++/* mb_cache_entry_delete - try to remove a cache entry + * @cache - cache we work with + * @key - key + * @value - value +@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value) + } + EXPORT_SYMBOL(mb_cache_entry_delete); + ++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users ++ * @cache - cache we work with ++ * @key - key ++ * @value - value ++ * ++ * Remove entry from cache @cache with key @key and value @value. The removal ++ * happens only if the entry is unused. The function returns NULL in case the ++ * entry was successfully removed or there's no entry in cache. Otherwise the ++ * function grabs reference of the entry that we failed to delete because it ++ * still has users and return it. ++ */ ++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, ++ u32 key, u64 value) ++{ ++ struct hlist_bl_node *node; ++ struct hlist_bl_head *head; ++ struct mb_cache_entry *entry; ++ ++ head = mb_cache_entry_head(cache, key); ++ hlist_bl_lock(head); ++ hlist_bl_for_each_entry(entry, node, head, e_hash_list) { ++ if (entry->e_key == key && entry->e_value == value) { ++ if (atomic_read(&entry->e_refcnt) > 2) { ++ atomic_inc(&entry->e_refcnt); ++ hlist_bl_unlock(head); ++ return entry; ++ } ++ /* We keep hash list reference to keep entry alive */ ++ hlist_bl_del_init(&entry->e_hash_list); ++ hlist_bl_unlock(head); ++ spin_lock(&cache->c_list_lock); ++ if (!list_empty(&entry->e_list)) { ++ list_del_init(&entry->e_list); ++ if (!WARN_ONCE(cache->c_entry_count == 0, ++ "mbcache: attempt to decrement c_entry_count past zero")) ++ cache->c_entry_count--; ++ atomic_dec(&entry->e_refcnt); ++ } ++ spin_unlock(&cache->c_list_lock); ++ mb_cache_entry_put(cache, entry); ++ return NULL; ++ } ++ } ++ hlist_bl_unlock(head); ++ ++ return NULL; ++} ++EXPORT_SYMBOL(mb_cache_entry_delete_or_get); ++ + /* mb_cache_entry_touch - cache entry got used + * @cache - cache the entry belongs to + * @entry - entry that got used +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index 20f1e3ff6013..8eca7f25c432 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache); + int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + u64 value, bool reusable); + void __mb_cache_entry_free(struct mb_cache_entry *entry); ++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); + static inline int mb_cache_entry_put(struct mb_cache *cache, + struct mb_cache_entry *entry) + { +- if (!atomic_dec_and_test(&entry->e_refcnt)) ++ unsigned int cnt = atomic_dec_return(&entry->e_refcnt); ++ ++ if (cnt > 0) { ++ if (cnt <= 3) ++ wake_up_var(&entry->e_refcnt); + return 0; ++ } + __mb_cache_entry_free(entry); + return 1; + } + ++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, ++ u32 key, u64 value); + void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value); + struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, + u64 value); +-- +2.35.1 + diff --git a/queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch b/queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch new file mode 100644 index 00000000000..df8052120ef --- /dev/null +++ b/queue-5.10/mbcache-automatically-delete-entries-from-cache-on-f.patch @@ -0,0 +1,274 @@ +From b2f2aa434be535ae2779b09d45284f35b60e6f9b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:29 +0200 +Subject: mbcache: automatically delete entries from cache on freeing + +From: Jan Kara + +[ Upstream commit 307af6c879377c1c63e71cbdd978201f9c7ee8df ] + +Use the fact that entries with elevated refcount are not removed from +the hash and just move removal of the entry from the hash to the entry +freeing time. When doing this we also change the generic code to hold +one reference to the cache entry, not two of them, which makes code +somewhat more obvious. + +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-10-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/mbcache.c | 108 +++++++++++++++------------------------- + include/linux/mbcache.h | 24 ++++++--- + 2 files changed, 55 insertions(+), 77 deletions(-) + +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 2010bc80a3f2..950f1829a7fd 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -90,7 +90,7 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + return -ENOMEM; + + INIT_LIST_HEAD(&entry->e_list); +- /* One ref for hash, one ref returned */ ++ /* Initial hash reference */ + atomic_set(&entry->e_refcnt, 1); + entry->e_key = key; + entry->e_value = value; +@@ -106,21 +106,28 @@ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + } + } + hlist_bl_add_head(&entry->e_hash_list, head); +- hlist_bl_unlock(head); +- ++ /* ++ * Add entry to LRU list before it can be found by ++ * mb_cache_entry_delete() to avoid races ++ */ + spin_lock(&cache->c_list_lock); + list_add_tail(&entry->e_list, &cache->c_list); +- /* Grab ref for LRU list */ +- atomic_inc(&entry->e_refcnt); + cache->c_entry_count++; + spin_unlock(&cache->c_list_lock); ++ hlist_bl_unlock(head); + + return 0; + } + EXPORT_SYMBOL(mb_cache_entry_create); + +-void __mb_cache_entry_free(struct mb_cache_entry *entry) ++void __mb_cache_entry_free(struct mb_cache *cache, struct mb_cache_entry *entry) + { ++ struct hlist_bl_head *head; ++ ++ head = mb_cache_entry_head(cache, entry->e_key); ++ hlist_bl_lock(head); ++ hlist_bl_del(&entry->e_hash_list); ++ hlist_bl_unlock(head); + kmem_cache_free(mb_entry_cache, entry); + } + EXPORT_SYMBOL(__mb_cache_entry_free); +@@ -134,7 +141,7 @@ EXPORT_SYMBOL(__mb_cache_entry_free); + */ + void mb_cache_entry_wait_unused(struct mb_cache_entry *entry) + { +- wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3); ++ wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 2); + } + EXPORT_SYMBOL(mb_cache_entry_wait_unused); + +@@ -155,10 +162,9 @@ static struct mb_cache_entry *__entry_find(struct mb_cache *cache, + while (node) { + entry = hlist_bl_entry(node, struct mb_cache_entry, + e_hash_list); +- if (entry->e_key == key && entry->e_reusable) { +- atomic_inc(&entry->e_refcnt); ++ if (entry->e_key == key && entry->e_reusable && ++ atomic_inc_not_zero(&entry->e_refcnt)) + goto out; +- } + node = node->next; + } + entry = NULL; +@@ -218,10 +224,9 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key, + head = mb_cache_entry_head(cache, key); + hlist_bl_lock(head); + hlist_bl_for_each_entry(entry, node, head, e_hash_list) { +- if (entry->e_key == key && entry->e_value == value) { +- atomic_inc(&entry->e_refcnt); ++ if (entry->e_key == key && entry->e_value == value && ++ atomic_inc_not_zero(&entry->e_refcnt)) + goto out; +- } + } + entry = NULL; + out: +@@ -281,37 +286,25 @@ EXPORT_SYMBOL(mb_cache_entry_delete); + struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, + u32 key, u64 value) + { +- struct hlist_bl_node *node; +- struct hlist_bl_head *head; + struct mb_cache_entry *entry; + +- head = mb_cache_entry_head(cache, key); +- hlist_bl_lock(head); +- hlist_bl_for_each_entry(entry, node, head, e_hash_list) { +- if (entry->e_key == key && entry->e_value == value) { +- if (atomic_read(&entry->e_refcnt) > 2) { +- atomic_inc(&entry->e_refcnt); +- hlist_bl_unlock(head); +- return entry; +- } +- /* We keep hash list reference to keep entry alive */ +- hlist_bl_del_init(&entry->e_hash_list); +- hlist_bl_unlock(head); +- spin_lock(&cache->c_list_lock); +- if (!list_empty(&entry->e_list)) { +- list_del_init(&entry->e_list); +- if (!WARN_ONCE(cache->c_entry_count == 0, +- "mbcache: attempt to decrement c_entry_count past zero")) +- cache->c_entry_count--; +- atomic_dec(&entry->e_refcnt); +- } +- spin_unlock(&cache->c_list_lock); +- mb_cache_entry_put(cache, entry); +- return NULL; +- } +- } +- hlist_bl_unlock(head); ++ entry = mb_cache_entry_get(cache, key, value); ++ if (!entry) ++ return NULL; + ++ /* ++ * Drop the ref we got from mb_cache_entry_get() and the initial hash ++ * ref if we are the last user ++ */ ++ if (atomic_cmpxchg(&entry->e_refcnt, 2, 0) != 2) ++ return entry; ++ ++ spin_lock(&cache->c_list_lock); ++ if (!list_empty(&entry->e_list)) ++ list_del_init(&entry->e_list); ++ cache->c_entry_count--; ++ spin_unlock(&cache->c_list_lock); ++ __mb_cache_entry_free(cache, entry); + return NULL; + } + EXPORT_SYMBOL(mb_cache_entry_delete_or_get); +@@ -343,42 +336,24 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + unsigned long nr_to_scan) + { + struct mb_cache_entry *entry; +- struct hlist_bl_head *head; + unsigned long shrunk = 0; + + spin_lock(&cache->c_list_lock); + while (nr_to_scan-- && !list_empty(&cache->c_list)) { + entry = list_first_entry(&cache->c_list, + struct mb_cache_entry, e_list); +- if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { ++ /* Drop initial hash reference if there is no user */ ++ if (entry->e_referenced || ++ atomic_cmpxchg(&entry->e_refcnt, 1, 0) != 1) { + entry->e_referenced = 0; + list_move_tail(&entry->e_list, &cache->c_list); + continue; + } + list_del_init(&entry->e_list); + cache->c_entry_count--; +- /* +- * We keep LRU list reference so that entry doesn't go away +- * from under us. +- */ + spin_unlock(&cache->c_list_lock); +- head = mb_cache_entry_head(cache, entry->e_key); +- hlist_bl_lock(head); +- /* Now a reliable check if the entry didn't get used... */ +- if (atomic_read(&entry->e_refcnt) > 2) { +- hlist_bl_unlock(head); +- spin_lock(&cache->c_list_lock); +- list_add_tail(&entry->e_list, &cache->c_list); +- cache->c_entry_count++; +- continue; +- } +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } +- hlist_bl_unlock(head); +- if (mb_cache_entry_put(cache, entry)) +- shrunk++; ++ __mb_cache_entry_free(cache, entry); ++ shrunk++; + cond_resched(); + spin_lock(&cache->c_list_lock); + } +@@ -470,11 +445,6 @@ void mb_cache_destroy(struct mb_cache *cache) + * point. + */ + list_for_each_entry_safe(entry, next, &cache->c_list, e_list) { +- if (!hlist_bl_unhashed(&entry->e_hash_list)) { +- hlist_bl_del_init(&entry->e_hash_list); +- atomic_dec(&entry->e_refcnt); +- } else +- WARN_ON(1); + list_del(&entry->e_list); + WARN_ON(atomic_read(&entry->e_refcnt) != 1); + mb_cache_entry_put(cache, entry); +diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h +index 8eca7f25c432..e9d5ece87794 100644 +--- a/include/linux/mbcache.h ++++ b/include/linux/mbcache.h +@@ -13,8 +13,16 @@ struct mb_cache; + struct mb_cache_entry { + /* List of entries in cache - protected by cache->c_list_lock */ + struct list_head e_list; +- /* Hash table list - protected by hash chain bitlock */ ++ /* ++ * Hash table list - protected by hash chain bitlock. The entry is ++ * guaranteed to be hashed while e_refcnt > 0. ++ */ + struct hlist_bl_node e_hash_list; ++ /* ++ * Entry refcount. Once it reaches zero, entry is unhashed and freed. ++ * While refcount > 0, the entry is guaranteed to stay in the hash and ++ * e.g. mb_cache_entry_try_delete() will fail. ++ */ + atomic_t e_refcnt; + /* Key in hash - stable during lifetime of the entry */ + u32 e_key; +@@ -29,20 +37,20 @@ void mb_cache_destroy(struct mb_cache *cache); + + int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key, + u64 value, bool reusable); +-void __mb_cache_entry_free(struct mb_cache_entry *entry); ++void __mb_cache_entry_free(struct mb_cache *cache, ++ struct mb_cache_entry *entry); + void mb_cache_entry_wait_unused(struct mb_cache_entry *entry); +-static inline int mb_cache_entry_put(struct mb_cache *cache, +- struct mb_cache_entry *entry) ++static inline void mb_cache_entry_put(struct mb_cache *cache, ++ struct mb_cache_entry *entry) + { + unsigned int cnt = atomic_dec_return(&entry->e_refcnt); + + if (cnt > 0) { +- if (cnt <= 3) ++ if (cnt <= 2) + wake_up_var(&entry->e_refcnt); +- return 0; ++ return; + } +- __mb_cache_entry_free(entry); +- return 1; ++ __mb_cache_entry_free(cache, entry); + } + + struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache, +-- +2.35.1 + diff --git a/queue-5.10/mbcache-don-t-reclaim-used-entries.patch b/queue-5.10/mbcache-don-t-reclaim-used-entries.patch new file mode 100644 index 00000000000..f222fe5c263 --- /dev/null +++ b/queue-5.10/mbcache-don-t-reclaim-used-entries.patch @@ -0,0 +1,56 @@ +From f0057e035b042df0fbd7e425bad663ecc584e266 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Jul 2022 12:54:20 +0200 +Subject: mbcache: don't reclaim used entries + +From: Jan Kara + +[ Upstream commit 58318914186c157477b978b1739dfe2f1b9dc0fe ] + +Do not reclaim entries that are currently used by somebody from a +shrinker. Firstly, these entries are likely useful. Secondly, we will +need to keep such entries to protect pending increment of xattr block +refcount. + +CC: stable@vger.kernel.org +Fixes: 82939d7999df ("ext4: convert to mbcache2") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20220712105436.32204-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Stable-dep-of: a44e84a9b776 ("ext4: fix deadlock due to mbcache entry corruption") +Signed-off-by: Sasha Levin +--- + fs/mbcache.c | 10 +++++++++- + 1 file changed, 9 insertions(+), 1 deletion(-) + +diff --git a/fs/mbcache.c b/fs/mbcache.c +index 97c54d3a2227..cfc28129fb6f 100644 +--- a/fs/mbcache.c ++++ b/fs/mbcache.c +@@ -288,7 +288,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + while (nr_to_scan-- && !list_empty(&cache->c_list)) { + entry = list_first_entry(&cache->c_list, + struct mb_cache_entry, e_list); +- if (entry->e_referenced) { ++ if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) { + entry->e_referenced = 0; + list_move_tail(&entry->e_list, &cache->c_list); + continue; +@@ -302,6 +302,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache, + spin_unlock(&cache->c_list_lock); + head = mb_cache_entry_head(cache, entry->e_key); + hlist_bl_lock(head); ++ /* Now a reliable check if the entry didn't get used... */ ++ if (atomic_read(&entry->e_refcnt) > 2) { ++ hlist_bl_unlock(head); ++ spin_lock(&cache->c_list_lock); ++ list_add_tail(&entry->e_list, &cache->c_list); ++ cache->c_entry_count++; ++ continue; ++ } + if (!hlist_bl_unhashed(&entry->e_hash_list)) { + hlist_bl_del_init(&entry->e_hash_list); + atomic_dec(&entry->e_refcnt); +-- +2.35.1 + diff --git a/queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch b/queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch new file mode 100644 index 00000000000..e48e0c903b6 --- /dev/null +++ b/queue-5.10/mm-highmem-lift-memcpy_-to-from-_page-to-core.patch @@ -0,0 +1,123 @@ +From d098fa4a5cb3f52e1b1f1c0ab93a98fcafb714de Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 9 Feb 2021 22:22:14 -0800 +Subject: mm/highmem: Lift memcpy_[to|from]_page to core + +From: Ira Weiny + +[ Upstream commit bb90d4bc7b6a536b2e4db45f4763e467c2008251 ] + +Working through a conversion to a call kmap_local_page() instead of +kmap() revealed many places where the pattern kmap/memcpy/kunmap +occurred. + +Eric Biggers, Matthew Wilcox, Christoph Hellwig, Dan Williams, and Al +Viro all suggested putting this code into helper functions. Al Viro +further pointed out that these functions already existed in the iov_iter +code.[1] + +Various locations for the lifted functions were considered. + +Headers like mm.h or string.h seem ok but don't really portray the +functionality well. pagemap.h made some sense but is for page cache +functionality.[2] + +Another alternative would be to create a new header for the promoted +memcpy functions, but it masks the fact that these are designed to copy +to/from pages using the kernel direct mappings and complicates matters +with a new header. + +Placing these functions in 'highmem.h' is suboptimal especially with the +changes being proposed in the functionality of kmap. From a caller +perspective including/using 'highmem.h' implies that the functions +defined in that header are only required when highmem is in use which is +increasingly not the case with modern processors. However, highmem.h is +where all the current functions like this reside (zero_user(), +clear_highpage(), clear_user_highpage(), copy_user_highpage(), and +copy_highpage()). So it makes the most sense even though it is +distasteful for some.[3] + +Lift memcpy_to_page() and memcpy_from_page() to pagemap.h. + +[1] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/ + https://lore.kernel.org/lkml/20201013112544.GA5249@infradead.org/ + +[2] https://lore.kernel.org/lkml/20201208122316.GH7338@casper.infradead.org/ + +[3] https://lore.kernel.org/lkml/20201013200149.GI3576660@ZenIV.linux.org.uk/#t + https://lore.kernel.org/lkml/20201208163814.GN1563847@iweiny-DESK2.sc.intel.com/ + +Cc: Boris Pismenny +Cc: Or Gerlitz +Cc: Dave Hansen +Suggested-by: Matthew Wilcox +Suggested-by: Christoph Hellwig +Suggested-by: Dan Williams +Suggested-by: Al Viro +Suggested-by: Eric Biggers +Reviewed-by: Chaitanya Kulkarni +Reviewed-by: Christoph Hellwig +Signed-off-by: Ira Weiny +Signed-off-by: David Sterba +Stable-dep-of: 956510c0c743 ("fs: ext4: initialize fsdata in pagecache_write()") +Signed-off-by: Sasha Levin +--- + include/linux/highmem.h | 18 ++++++++++++++++++ + lib/iov_iter.c | 14 -------------- + 2 files changed, 18 insertions(+), 14 deletions(-) + +diff --git a/include/linux/highmem.h b/include/linux/highmem.h +index 14e6202ce47f..b25df1f8d48d 100644 +--- a/include/linux/highmem.h ++++ b/include/linux/highmem.h +@@ -345,4 +345,22 @@ static inline void copy_highpage(struct page *to, struct page *from) + + #endif + ++static inline void memcpy_from_page(char *to, struct page *page, ++ size_t offset, size_t len) ++{ ++ char *from = kmap_atomic(page); ++ ++ memcpy(to, from + offset, len); ++ kunmap_atomic(from); ++} ++ ++static inline void memcpy_to_page(struct page *page, size_t offset, ++ const char *from, size_t len) ++{ ++ char *to = kmap_atomic(page); ++ ++ memcpy(to + offset, from, len); ++ kunmap_atomic(to); ++} ++ + #endif /* _LINUX_HIGHMEM_H */ +diff --git a/lib/iov_iter.c b/lib/iov_iter.c +index 650554964f18..6e30113303ba 100644 +--- a/lib/iov_iter.c ++++ b/lib/iov_iter.c +@@ -467,20 +467,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction, + } + EXPORT_SYMBOL(iov_iter_init); + +-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +-{ +- char *from = kmap_atomic(page); +- memcpy(to, from + offset, len); +- kunmap_atomic(from); +-} +- +-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) +-{ +- char *to = kmap_atomic(page); +- memcpy(to + offset, from, len); +- kunmap_atomic(to); +-} +- + static void memzero_page(struct page *page, size_t offset, size_t len) + { + char *addr = kmap_atomic(page); +-- +2.35.1 + diff --git a/queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch b/queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch new file mode 100644 index 00000000000..9a4ce4096eb --- /dev/null +++ b/queue-5.10/net-amd-xgbe-add-missed-tasklet_kill.patch @@ -0,0 +1,71 @@ +From a3f1e166ebaff9e6042cd8bfcf30de070292c5b9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 16:14:47 +0800 +Subject: net: amd-xgbe: add missed tasklet_kill + +From: Jiguang Xiao + +[ Upstream commit d530ece70f16f912e1d1bfeea694246ab78b0a4b ] + +The driver does not call tasklet_kill in several places. +Add the calls to fix it. + +Fixes: 85b85c853401 ("amd-xgbe: Re-issue interrupt if interrupt status not cleared") +Signed-off-by: Jiguang Xiao +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/amd/xgbe/xgbe-drv.c | 3 +++ + drivers/net/ethernet/amd/xgbe/xgbe-i2c.c | 4 +++- + drivers/net/ethernet/amd/xgbe/xgbe-mdio.c | 4 +++- + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +index a816b30bca04..a5d6faf7b89e 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +@@ -1064,6 +1064,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata) + + devm_free_irq(pdata->dev, pdata->dev_irq, pdata); + ++ tasklet_kill(&pdata->tasklet_dev); ++ tasklet_kill(&pdata->tasklet_ecc); ++ + if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) + devm_free_irq(pdata->dev, pdata->ecc_irq, pdata); + +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +index 22d4fc547a0a..a9ccc4258ee5 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +@@ -447,8 +447,10 @@ static void xgbe_i2c_stop(struct xgbe_prv_data *pdata) + xgbe_i2c_disable(pdata); + xgbe_i2c_clear_all_interrupts(pdata); + +- if (pdata->dev_irq != pdata->i2c_irq) ++ if (pdata->dev_irq != pdata->i2c_irq) { + devm_free_irq(pdata->dev, pdata->i2c_irq, pdata); ++ tasklet_kill(&pdata->tasklet_i2c); ++ } + } + + static int xgbe_i2c_start(struct xgbe_prv_data *pdata) +diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +index 4e97b4869522..0c5c1b155683 100644 +--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c ++++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c +@@ -1390,8 +1390,10 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata) + /* Disable auto-negotiation */ + xgbe_an_disable_all(pdata); + +- if (pdata->dev_irq != pdata->an_irq) ++ if (pdata->dev_irq != pdata->an_irq) { + devm_free_irq(pdata->dev, pdata->an_irq, pdata); ++ tasklet_kill(&pdata->tasklet_an); ++ } + + pdata->phy_if.phy_impl.stop(pdata); + +-- +2.35.1 + diff --git a/queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch b/queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch new file mode 100644 index 00000000000..54466bde295 --- /dev/null +++ b/queue-5.10/net-hns3-add-interrupts-re-initialization-while-doin.patch @@ -0,0 +1,43 @@ +From 33656c78a47654a45874e2b6c7ab0e2eaba9e8aa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:43:41 +0800 +Subject: net: hns3: add interrupts re-initialization while doing VF FLR + +From: Jie Wang + +[ Upstream commit 09e6b30eeb254f1818a008cace3547159e908dfd ] + +Currently keep alive message between PF and VF may be lost and the VF is +unalive in PF. So the VF will not do reset during PF FLR reset process. +This would make the allocated interrupt resources of VF invalid and VF +would't receive or respond to PF any more. + +So this patch adds VF interrupts re-initialization during VF FLR for VF +recovery in above cases. + +Fixes: 862d969a3a4d ("net: hns3: do VF's pci re-initialization while PF doing FLR") +Signed-off-by: Jie Wang +Signed-off-by: Hao Lan +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +index d6580e942724..f7f3e4bbc477 100644 +--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c ++++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +@@ -3089,7 +3089,8 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev) + struct pci_dev *pdev = hdev->pdev; + int ret = 0; + +- if (hdev->reset_type == HNAE3_VF_FULL_RESET && ++ if ((hdev->reset_type == HNAE3_VF_FULL_RESET || ++ hdev->reset_type == HNAE3_FLR_RESET) && + test_bit(HCLGEVF_STATE_IRQ_INITED, &hdev->state)) { + hclgevf_misc_irq_uninit(hdev); + hclgevf_uninit_msi(hdev); +-- +2.35.1 + diff --git a/queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch b/queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch new file mode 100644 index 00000000000..b4466205027 --- /dev/null +++ b/queue-5.10/net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch @@ -0,0 +1,39 @@ +From 4259afd7d9308433854c19ba2e7bd444d9d8758d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Oct 2022 12:51:52 +0200 +Subject: net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error + path + +From: Jiri Pirko + +[ Upstream commit 2a35b2c2e6a252eda2134aae6a756861d9299531 ] + +There are two cleanup calls missing in mlx5_init_once() error path. +Add them making the error path flow to be the same as +mlx5_cleanup_once(). + +Fixes: 52ec462eca9b ("net/mlx5: Add reserved-gids support") +Fixes: 7c39afb394c7 ("net/mlx5: PTP code migration to driver core section") +Signed-off-by: Jiri Pirko +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c +index 8246b6285d5a..29bc1df28aeb 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c +@@ -906,6 +906,8 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) + err_tables_cleanup: + mlx5_geneve_destroy(dev->geneve); + mlx5_vxlan_destroy(dev->vxlan); ++ mlx5_cleanup_clock(dev); ++ mlx5_cleanup_reserved_gids(dev); + mlx5_cq_debugfs_cleanup(dev); + mlx5_fw_reset_cleanup(dev); + err_events_cleanup: +-- +2.35.1 + diff --git a/queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch b/queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch new file mode 100644 index 00000000000..e8b04e5d17d --- /dev/null +++ b/queue-5.10/net-mlx5-avoid-recovery-in-probe-flows.patch @@ -0,0 +1,49 @@ +From 87650d1c366cdc9c687b5fa60b044fa660402cd5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Nov 2022 13:34:12 +0200 +Subject: net/mlx5: Avoid recovery in probe flows + +From: Shay Drory + +[ Upstream commit 9078e843efec530f279a155f262793c58b0746bd ] + +Currently, recovery is done without considering whether the device is +still in probe flow. +This may lead to recovery before device have finished probed +successfully. e.g.: while mlx5_init_one() is running. Recovery flow is +using functionality that is loaded only by mlx5_init_one(), and there +is no point in running recovery without mlx5_init_one() finished +successfully. + +Fix it by waiting for probe flow to finish and checking whether the +device is probed before trying to perform recovery. + +Fixes: 51d138c2610a ("net/mlx5: Fix health error state handling") +Signed-off-by: Shay Drory +Reviewed-by: Moshe Shemesh +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/health.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c +index 0c32c485eb58..b21054514736 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c +@@ -618,6 +618,12 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) + priv = container_of(health, struct mlx5_priv, health); + dev = container_of(priv, struct mlx5_core_dev, priv); + ++ mutex_lock(&dev->intf_state_mutex); ++ if (test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags)) { ++ mlx5_core_err(dev, "health works are not permitted at this stage\n"); ++ return; ++ } ++ mutex_unlock(&dev->intf_state_mutex); + enter_error_state(dev, false); + if (IS_ERR_OR_NULL(health->fw_fatal_reporter)) { + if (mlx5_health_try_recover(dev)) +-- +2.35.1 + diff --git a/queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch b/queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch new file mode 100644 index 00000000000..b5f2f31e104 --- /dev/null +++ b/queue-5.10/net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch @@ -0,0 +1,48 @@ +From cc96b29be9be075c5be4df53492e3f617c586b0b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Dec 2022 16:02:57 +0200 +Subject: net/mlx5e: Fix hw mtu initializing at XDP SQ allocation + +From: Adham Faris + +[ Upstream commit 1e267ab88dc44c48f556218f7b7f14c76f7aa066 ] + +Current xdp xmit functions logic (mlx5e_xmit_xdp_frame_mpwqe or +mlx5e_xmit_xdp_frame), validates xdp packet length by comparing it to +hw mtu (configured at xdp sq allocation) before xmiting it. This check +does not account for ethernet fcs length (calculated and filled by the +nic). Hence, when we try sending packets with length > (hw-mtu - +ethernet-fcs-size), the device port drops it and tx_errors_phy is +incremented. Desired behavior is to catch these packets and drop them +by the driver. + +Fix this behavior in XDP SQ allocation function (mlx5e_alloc_xdpsq) by +subtracting ethernet FCS header size (4 Bytes) from current hw mtu +value, since ethernet FCS is calculated and written to ethernet frames +by the nic. + +Fixes: d8bec2b29a82 ("net/mlx5e: Support bpf_xdp_adjust_head()") +Signed-off-by: Adham Faris +Reviewed-by: Tariq Toukan +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +index cfc3bfcb04a2..5673a4113253 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +@@ -992,7 +992,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, + sq->channel = c; + sq->uar_map = mdev->mlx5e_res.bfreg.map; + sq->min_inline_mode = params->tx_min_inline_mode; +- sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); ++ sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; + sq->xsk_pool = xsk_pool; + + sq->stats = sq->xsk_pool ? +-- +2.35.1 + diff --git a/queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch b/queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch new file mode 100644 index 00000000000..8e94b1d115a --- /dev/null +++ b/queue-5.10/net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch @@ -0,0 +1,45 @@ +From 1d33b28a01796b6437dcccdcc70f4cc7e3f3b8a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 28 Nov 2022 15:24:21 +0200 +Subject: net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by + default + +From: Dragos Tatulea + +[ Upstream commit b12d581e83e3ae1080c32ab83f123005bd89a840 ] + +mlx5e_build_nic_params will turn CQE compression on if the hardware +capability is enabled and the slow_pci_heuristic condition is detected. +As IPoIB doesn't support CQE compression, make sure to disable the +feature in the IPoIB profile init. + +Please note that the feature is not exposed to the user for IPoIB +interfaces, so it can't be subsequently turned on. + +Fixes: b797a684b0dd ("net/mlx5e: Enable CQE compression when PCI is slower than link") +Signed-off-by: Dragos Tatulea +Reviewed-by: Gal Pressman +Signed-off-by: Saeed Mahameed +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +index 5c6a376aa62e..0e7fd200b426 100644 +--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c ++++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +@@ -69,6 +69,10 @@ static void mlx5i_build_nic_params(struct mlx5_core_dev *mdev, + params->lro_en = false; + params->hard_mtu = MLX5_IB_GRH_BYTES + MLX5_IPOIB_HARD_LEN; + params->tunneled_offload_en = false; ++ ++ /* CQE compression is not supported for IPoIB */ ++ params->rx_cqe_compress_def = false; ++ MLX5E_SET_PFLAG(params, MLX5E_PFLAG_RX_CQE_COMPRESS, params->rx_cqe_compress_def); + } + + /* Called directly after IPoIB netdevice was created to initialize SW structs */ +-- +2.35.1 + diff --git a/queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch b/queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch new file mode 100644 index 00000000000..20087870052 --- /dev/null +++ b/queue-5.10/net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch @@ -0,0 +1,35 @@ +From 1ba6e7c0947e2d8d46aefaac005f49342d83faf9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 10:29:25 +0400 +Subject: net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe + +From: Miaoqian Lin + +[ Upstream commit d039535850ee47079d59527e96be18d8e0daa84b ] + +of_phy_find_device() return device node with refcount incremented. +Call put_device() to relese it when not needed anymore. + +Fixes: ab4e6ee578e8 ("net: phy: xgmiitorgmii: Check phy_driver ready before accessing") +Signed-off-by: Miaoqian Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/xilinx_gmii2rgmii.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c +index 151c2a3f0b3a..7a78dfdfa5bd 100644 +--- a/drivers/net/phy/xilinx_gmii2rgmii.c ++++ b/drivers/net/phy/xilinx_gmii2rgmii.c +@@ -82,6 +82,7 @@ static int xgmiitorgmii_probe(struct mdio_device *mdiodev) + + if (!priv->phy_dev->drv) { + dev_info(dev, "Attached phy not ready\n"); ++ put_device(&priv->phy_dev->mdio.dev); + return -EPROBE_DEFER; + } + +-- +2.35.1 + diff --git a/queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch b/queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch new file mode 100644 index 00000000000..ccbaa77e5e1 --- /dev/null +++ b/queue-5.10/net-sched-atm-dont-intepret-cls-results-when-asked-t.patch @@ -0,0 +1,42 @@ +From f32f7d5cdc2515b7a4d53857e223ad17c1c59c2f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Jan 2023 16:57:43 -0500 +Subject: net: sched: atm: dont intepret cls results when asked to drop + +From: Jamal Hadi Salim + +[ Upstream commit a2965c7be0522eaa18808684b7b82b248515511b ] + +If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume +res.class contains a valid pointer +Fixes: b0188d4dbe5f ("[NET_SCHED]: sch_atm: Lindent") + +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_atm.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c +index 794c7377cd7e..95967ce1f370 100644 +--- a/net/sched/sch_atm.c ++++ b/net/sched/sch_atm.c +@@ -396,10 +396,13 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, + result = tcf_classify(skb, fl, &res, true); + if (result < 0) + continue; ++ if (result == TC_ACT_SHOT) ++ goto done; ++ + flow = (struct atm_flow_data *)res.class; + if (!flow) + flow = lookup_flow(sch, res.classid); +- goto done; ++ goto drop; + } + } + flow = NULL; +-- +2.35.1 + diff --git a/queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch b/queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch new file mode 100644 index 00000000000..e12db458a70 --- /dev/null +++ b/queue-5.10/net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch @@ -0,0 +1,147 @@ +From a4e6f6cce1acb5309ca50e4ca601d69f1514f525 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 1 Jan 2023 16:57:44 -0500 +Subject: net: sched: cbq: dont intepret cls results when asked to drop + +From: Jamal Hadi Salim + +[ Upstream commit caa4b35b4317d5147b3ab0fbdc9c075c7d2e9c12 ] + +If asked to drop a packet via TC_ACT_SHOT it is unsafe to assume that +res.class contains a valid pointer + +Sample splat reported by Kyle Zeng + +[ 5.405624] 0: reclassify loop, rule prio 0, protocol 800 +[ 5.406326] ================================================================== +[ 5.407240] BUG: KASAN: slab-out-of-bounds in cbq_enqueue+0x54b/0xea0 +[ 5.407987] Read of size 1 at addr ffff88800e3122aa by task poc/299 +[ 5.408731] +[ 5.408897] CPU: 0 PID: 299 Comm: poc Not tainted 5.10.155+ #15 +[ 5.409516] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), +BIOS 1.15.0-1 04/01/2014 +[ 5.410439] Call Trace: +[ 5.410764] dump_stack+0x87/0xcd +[ 5.411153] print_address_description+0x7a/0x6b0 +[ 5.411687] ? vprintk_func+0xb9/0xc0 +[ 5.411905] ? printk+0x76/0x96 +[ 5.412110] ? cbq_enqueue+0x54b/0xea0 +[ 5.412323] kasan_report+0x17d/0x220 +[ 5.412591] ? cbq_enqueue+0x54b/0xea0 +[ 5.412803] __asan_report_load1_noabort+0x10/0x20 +[ 5.413119] cbq_enqueue+0x54b/0xea0 +[ 5.413400] ? __kasan_check_write+0x10/0x20 +[ 5.413679] __dev_queue_xmit+0x9c0/0x1db0 +[ 5.413922] dev_queue_xmit+0xc/0x10 +[ 5.414136] ip_finish_output2+0x8bc/0xcd0 +[ 5.414436] __ip_finish_output+0x472/0x7a0 +[ 5.414692] ip_finish_output+0x5c/0x190 +[ 5.414940] ip_output+0x2d8/0x3c0 +[ 5.415150] ? ip_mc_finish_output+0x320/0x320 +[ 5.415429] __ip_queue_xmit+0x753/0x1760 +[ 5.415664] ip_queue_xmit+0x47/0x60 +[ 5.415874] __tcp_transmit_skb+0x1ef9/0x34c0 +[ 5.416129] tcp_connect+0x1f5e/0x4cb0 +[ 5.416347] tcp_v4_connect+0xc8d/0x18c0 +[ 5.416577] __inet_stream_connect+0x1ae/0xb40 +[ 5.416836] ? local_bh_enable+0x11/0x20 +[ 5.417066] ? lock_sock_nested+0x175/0x1d0 +[ 5.417309] inet_stream_connect+0x5d/0x90 +[ 5.417548] ? __inet_stream_connect+0xb40/0xb40 +[ 5.417817] __sys_connect+0x260/0x2b0 +[ 5.418037] __x64_sys_connect+0x76/0x80 +[ 5.418267] do_syscall_64+0x31/0x50 +[ 5.418477] entry_SYSCALL_64_after_hwframe+0x61/0xc6 +[ 5.418770] RIP: 0033:0x473bb7 +[ 5.418952] Code: 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 +00 00 90 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2a 00 00 +00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 18 89 54 24 0c 48 89 34 +24 89 +[ 5.420046] RSP: 002b:00007fffd20eb0f8 EFLAGS: 00000246 ORIG_RAX: +000000000000002a +[ 5.420472] RAX: ffffffffffffffda RBX: 00007fffd20eb578 RCX: 0000000000473bb7 +[ 5.420872] RDX: 0000000000000010 RSI: 00007fffd20eb110 RDI: 0000000000000007 +[ 5.421271] RBP: 00007fffd20eb150 R08: 0000000000000001 R09: 0000000000000004 +[ 5.421671] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000001 +[ 5.422071] R13: 00007fffd20eb568 R14: 00000000004fc740 R15: 0000000000000002 +[ 5.422471] +[ 5.422562] Allocated by task 299: +[ 5.422782] __kasan_kmalloc+0x12d/0x160 +[ 5.423007] kasan_kmalloc+0x5/0x10 +[ 5.423208] kmem_cache_alloc_trace+0x201/0x2e0 +[ 5.423492] tcf_proto_create+0x65/0x290 +[ 5.423721] tc_new_tfilter+0x137e/0x1830 +[ 5.423957] rtnetlink_rcv_msg+0x730/0x9f0 +[ 5.424197] netlink_rcv_skb+0x166/0x300 +[ 5.424428] rtnetlink_rcv+0x11/0x20 +[ 5.424639] netlink_unicast+0x673/0x860 +[ 5.424870] netlink_sendmsg+0x6af/0x9f0 +[ 5.425100] __sys_sendto+0x58d/0x5a0 +[ 5.425315] __x64_sys_sendto+0xda/0xf0 +[ 5.425539] do_syscall_64+0x31/0x50 +[ 5.425764] entry_SYSCALL_64_after_hwframe+0x61/0xc6 +[ 5.426065] +[ 5.426157] The buggy address belongs to the object at ffff88800e312200 +[ 5.426157] which belongs to the cache kmalloc-128 of size 128 +[ 5.426955] The buggy address is located 42 bytes to the right of +[ 5.426955] 128-byte region [ffff88800e312200, ffff88800e312280) +[ 5.427688] The buggy address belongs to the page: +[ 5.427992] page:000000009875fabc refcount:1 mapcount:0 +mapping:0000000000000000 index:0x0 pfn:0xe312 +[ 5.428562] flags: 0x100000000000200(slab) +[ 5.428812] raw: 0100000000000200 dead000000000100 dead000000000122 +ffff888007843680 +[ 5.429325] raw: 0000000000000000 0000000000100010 00000001ffffffff +ffff88800e312401 +[ 5.429875] page dumped because: kasan: bad access detected +[ 5.430214] page->mem_cgroup:ffff88800e312401 +[ 5.430471] +[ 5.430564] Memory state around the buggy address: +[ 5.430846] ffff88800e312180: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.431267] ffff88800e312200: 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 fc +[ 5.431705] >ffff88800e312280: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.432123] ^ +[ 5.432391] ffff88800e312300: 00 00 00 00 00 00 00 00 00 00 00 00 +00 00 00 fc +[ 5.432810] ffff88800e312380: fc fc fc fc fc fc fc fc fc fc fc fc +fc fc fc fc +[ 5.433229] ================================================================== +[ 5.433648] Disabling lock debugging due to kernel taint + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Kyle Zeng +Signed-off-by: Jamal Hadi Salim +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/sch_cbq.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c +index 9a3dff02b7a2..3da5eb313c24 100644 +--- a/net/sched/sch_cbq.c ++++ b/net/sched/sch_cbq.c +@@ -231,6 +231,8 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + result = tcf_classify(skb, fl, &res, true); + if (!fl || result < 0) + goto fallback; ++ if (result == TC_ACT_SHOT) ++ return NULL; + + cl = (void *)res.class; + if (!cl) { +@@ -251,8 +253,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) + case TC_ACT_TRAP: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + fallthrough; +- case TC_ACT_SHOT: +- return NULL; + case TC_ACT_RECLASSIFY: + return cbq_reclassify(skb, cl); + } +-- +2.35.1 + diff --git a/queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch b/queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch new file mode 100644 index 00000000000..0a6108d5317 --- /dev/null +++ b/queue-5.10/net-sched-fix-memory-leak-in-tcindex_set_parms.patch @@ -0,0 +1,150 @@ +From 0e40e1546d4f862f22270418d33681756dec9507 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 11:51:19 +0800 +Subject: net: sched: fix memory leak in tcindex_set_parms + +From: Hawkins Jiawei + +[ Upstream commit 399ab7fe0fa0d846881685fd4e57e9a8ef7559f7 ] + +Syzkaller reports a memory leak as follows: +==================================== +BUG: memory leak +unreferenced object 0xffff88810c287f00 (size 256): + comm "syz-executor105", pid 3600, jiffies 4294943292 (age 12.990s) + hex dump (first 32 bytes): + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ + backtrace: + [] kmalloc_trace+0x20/0x90 mm/slab_common.c:1046 + [] kmalloc include/linux/slab.h:576 [inline] + [] kmalloc_array include/linux/slab.h:627 [inline] + [] kcalloc include/linux/slab.h:659 [inline] + [] tcf_exts_init include/net/pkt_cls.h:250 [inline] + [] tcindex_set_parms+0xa7/0xbe0 net/sched/cls_tcindex.c:342 + [] tcindex_change+0xdf/0x120 net/sched/cls_tcindex.c:553 + [] tc_new_tfilter+0x4f2/0x1100 net/sched/cls_api.c:2147 + [] rtnetlink_rcv_msg+0x4dc/0x5d0 net/core/rtnetlink.c:6082 + [] netlink_rcv_skb+0x87/0x1d0 net/netlink/af_netlink.c:2540 + [] netlink_unicast_kernel net/netlink/af_netlink.c:1319 [inline] + [] netlink_unicast+0x397/0x4c0 net/netlink/af_netlink.c:1345 + [] netlink_sendmsg+0x396/0x710 net/netlink/af_netlink.c:1921 + [] sock_sendmsg_nosec net/socket.c:714 [inline] + [] sock_sendmsg+0x56/0x80 net/socket.c:734 + [] ____sys_sendmsg+0x178/0x410 net/socket.c:2482 + [] ___sys_sendmsg+0xa8/0x110 net/socket.c:2536 + [] __sys_sendmmsg+0x105/0x330 net/socket.c:2622 + [] __do_sys_sendmmsg net/socket.c:2651 [inline] + [] __se_sys_sendmmsg net/socket.c:2648 [inline] + [] __x64_sys_sendmmsg+0x24/0x30 net/socket.c:2648 + [] do_syscall_x64 arch/x86/entry/common.c:50 [inline] + [] do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + [] entry_SYSCALL_64_after_hwframe+0x63/0xcd +==================================== + +Kernel uses tcindex_change() to change an existing +filter properties. + +Yet the problem is that, during the process of changing, +if `old_r` is retrieved from `p->perfect`, then +kernel uses tcindex_alloc_perfect_hash() to newly +allocate filter results, uses tcindex_filter_result_init() +to clear the old filter result, without destroying +its tcf_exts structure, which triggers the above memory leak. + +To be more specific, there are only two source for the `old_r`, +according to the tcindex_lookup(). `old_r` is retrieved from +`p->perfect`, or `old_r` is retrieved from `p->h`. + + * If `old_r` is retrieved from `p->perfect`, kernel uses +tcindex_alloc_perfect_hash() to newly allocate the +filter results. Then `r` is assigned with `cp->perfect + handle`, +which is newly allocated. So condition `old_r && old_r != r` is +true in this situation, and kernel uses tcindex_filter_result_init() +to clear the old filter result, without destroying +its tcf_exts structure + + * If `old_r` is retrieved from `p->h`, then `p->perfect` is NULL +according to the tcindex_lookup(). Considering that `cp->h` +is directly copied from `p->h` and `p->perfect` is NULL, +`r` is assigned with `tcindex_lookup(cp, handle)`, whose value +should be the same as `old_r`, so condition `old_r && old_r != r` +is false in this situation, kernel ignores using +tcindex_filter_result_init() to clear the old filter result. + +So only when `old_r` is retrieved from `p->perfect` does kernel use +tcindex_filter_result_init() to clear the old filter result, which +triggers the above memory leak. + +Considering that there already exists a tc_filter_wq workqueue +to destroy the old tcindex_data by tcindex_partial_destroy_work() +at the end of tcindex_set_parms(), this patch solves +this memory leak bug by removing this old filter result +clearing part and delegating it to the tc_filter_wq workqueue. + +Note that this patch doesn't introduce any other issues. If +`old_r` is retrieved from `p->perfect`, this patch just +delegates old filter result clearing part to the +tc_filter_wq workqueue; If `old_r` is retrieved from `p->h`, +kernel doesn't reach the old filter result clearing part, so +removing this part has no effect. + +[Thanks to the suggestion from Jakub Kicinski, Cong Wang, Paolo Abeni +and Dmitry Vyukov] + +Fixes: b9a24bb76bf6 ("net_sched: properly handle failure case of tcf_exts_init()") +Link: https://lore.kernel.org/all/0000000000001de5c505ebc9ec59@google.com/ +Reported-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com +Tested-by: syzbot+232ebdbd36706c965ebf@syzkaller.appspotmail.com +Cc: Cong Wang +Cc: Jakub Kicinski +Cc: Paolo Abeni +Cc: Dmitry Vyukov +Acked-by: Paolo Abeni +Signed-off-by: Hawkins Jiawei +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/sched/cls_tcindex.c | 12 ++---------- + 1 file changed, 2 insertions(+), 10 deletions(-) + +diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c +index e9a8a2c86bbd..86250221d08d 100644 +--- a/net/sched/cls_tcindex.c ++++ b/net/sched/cls_tcindex.c +@@ -332,7 +332,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + struct tcindex_filter_result *r, struct nlattr **tb, + struct nlattr *est, bool ovr, struct netlink_ext_ack *extack) + { +- struct tcindex_filter_result new_filter_result, *old_r = r; ++ struct tcindex_filter_result new_filter_result; + struct tcindex_data *cp = NULL, *oldp; + struct tcindex_filter *f = NULL; /* make gcc behave */ + struct tcf_result cr = {}; +@@ -401,7 +401,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + err = tcindex_filter_result_init(&new_filter_result, cp, net); + if (err < 0) + goto errout_alloc; +- if (old_r) ++ if (r) + cr = r->res; + + err = -EBUSY; +@@ -478,14 +478,6 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, + tcf_bind_filter(tp, &cr, base); + } + +- if (old_r && old_r != r) { +- err = tcindex_filter_result_init(old_r, cp, net); +- if (err < 0) { +- kfree(f); +- goto errout_alloc; +- } +- } +- + oldp = p; + r->res = cr; + tcf_exts_change(&r->exts, &e); +-- +2.35.1 + diff --git a/queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch b/queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch new file mode 100644 index 00000000000..4184a1bb616 --- /dev/null +++ b/queue-5.10/netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch @@ -0,0 +1,109 @@ +From 3d7418d64c90ca3d25c6d73f80137f6518e4f490 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 13:24:37 +0100 +Subject: netfilter: ipset: fix hash:net,port,net hang with /0 subnet +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jozsef Kadlecsik + +[ Upstream commit a31d47be64b9b74f8cfedffe03e0a8a1f9e51f23 ] + +The hash:net,port,net set type supports /0 subnets. However, the patch +commit 5f7b51bf09baca8e titled "netfilter: ipset: Limit the maximal range +of consecutive elements to add/delete" did not take into account it and +resulted in an endless loop. The bug is actually older but the patch +5f7b51bf09baca8e brings it out earlier. + +Handle /0 subnets properly in hash:net,port,net set types. + +Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") +Reported-by: Марк Коренберг +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/ipset/ip_set_hash_netportnet.c | 40 ++++++++++---------- + 1 file changed, 21 insertions(+), 19 deletions(-) + +diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c +index 6446f4fccc72..144346faffc1 100644 +--- a/net/netfilter/ipset/ip_set_hash_netportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netportnet.c +@@ -172,17 +172,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb, + return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags); + } + ++static u32 ++hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr) ++{ ++ if (from == 0 && to == UINT_MAX) { ++ *cidr = 0; ++ return to; ++ } ++ return ip_set_range_to_cidr(from, to, cidr); ++} ++ + static int + hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netportnet4 *h = set->data; ++ struct hash_netportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netportnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2, ipn; +- u64 n = 0, m = 0; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + int ret; + +@@ -284,19 +293,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -309,13 +305,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + + do { + e.ip[0] = htonl(ip); +- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ++ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]); + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip[1] = htonl(ip2); +- ip2 = ip_set_range_to_cidr(ip2, ip2_to, +- &e.cidr[1]); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } ++ ip2 = hash_netportnet4_range_to_cidr(ip2, ++ ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +-- +2.35.1 + diff --git a/queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch b/queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch new file mode 100644 index 00000000000..3f489010d7d --- /dev/null +++ b/queue-5.10/netfilter-ipset-rework-long-task-execution-when-addi.patch @@ -0,0 +1,462 @@ +From 8cdd02e4865a438fda6662920436bad63b35d1d1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 13:24:38 +0100 +Subject: netfilter: ipset: Rework long task execution when adding/deleting + entries + +From: Jozsef Kadlecsik + +[ Upstream commit 5e29dc36bd5e2166b834ceb19990d9e68a734d7d ] + +When adding/deleting large number of elements in one step in ipset, it can +take a reasonable amount of time and can result in soft lockup errors. The +patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of +consecutive elements to add/delete") tried to fix it by limiting the max +elements to process at all. However it was not enough, it is still possible +that we get hung tasks. Lowering the limit is not reasonable, so the +approach in this patch is as follows: rely on the method used at resizing +sets and save the state when we reach a smaller internal batch limit, +unlock/lock and proceed from the saved state. Thus we can avoid long +continuous tasks and at the same time removed the limit to add/delete large +number of elements in one step. + +The nfnl mutex is held during the whole operation which prevents one to +issue other ipset commands in parallel. + +Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete") +Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com +Signed-off-by: Jozsef Kadlecsik +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/linux/netfilter/ipset/ip_set.h | 2 +- + net/netfilter/ipset/ip_set_core.c | 7 ++++--- + net/netfilter/ipset/ip_set_hash_ip.c | 14 ++++++------- + net/netfilter/ipset/ip_set_hash_ipmark.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipport.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipportip.c | 13 ++++++------ + net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 +++++++----- + net/netfilter/ipset/ip_set_hash_net.c | 17 +++++++-------- + net/netfilter/ipset/ip_set_hash_netiface.c | 15 ++++++-------- + net/netfilter/ipset/ip_set_hash_netnet.c | 23 +++++++-------------- + net/netfilter/ipset/ip_set_hash_netport.c | 19 +++++++---------- + 11 files changed, 68 insertions(+), 81 deletions(-) + +diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h +index 53c9a17ecb3e..62f7e7e257c1 100644 +--- a/include/linux/netfilter/ipset/ip_set.h ++++ b/include/linux/netfilter/ipset/ip_set.h +@@ -199,7 +199,7 @@ struct ip_set_region { + }; + + /* Max range where every element is added/deleted in one step */ +-#define IPSET_MAX_RANGE (1<<20) ++#define IPSET_MAX_RANGE (1<<14) + + /* The core set type structure */ + struct ip_set_type { +diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c +index c17a7dda0163..1bf6ab83644b 100644 +--- a/net/netfilter/ipset/ip_set_core.c ++++ b/net/netfilter/ipset/ip_set_core.c +@@ -1708,9 +1708,10 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, + ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried); + ip_set_unlock(set); + retried = true; +- } while (ret == -EAGAIN && +- set->variant->resize && +- (ret = set->variant->resize(set, retried)) == 0); ++ } while (ret == -ERANGE || ++ (ret == -EAGAIN && ++ set->variant->resize && ++ (ret = set->variant->resize(set, retried)) == 0)); + + if (!ret || (ret == -IPSET_ERR_EXIST && eexist)) + return 0; +diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c +index d7a81b2250e7..8720dc3bb689 100644 +--- a/net/netfilter/ipset/ip_set_hash_ip.c ++++ b/net/netfilter/ipset/ip_set_hash_ip.c +@@ -97,11 +97,11 @@ static int + hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ip4 *h = set->data; ++ struct hash_ip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ip4_elem e = { 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, hosts; ++ u32 ip = 0, ip_to = 0, hosts, i = 0; + int ret = 0; + + if (tb[IPSET_ATTR_LINENO]) +@@ -146,14 +146,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], + + hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); + +- /* 64bit division is not allowed on 32bit */ +- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); +- for (; ip <= ip_to;) { ++ for (; ip <= ip_to; i++) { + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c +index eefce34a34f0..cbb05cb188f2 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipmark.c ++++ b/net/netfilter/ipset/ip_set_hash_ipmark.c +@@ -96,11 +96,11 @@ static int + hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipmark4 *h = set->data; ++ struct hash_ipmark4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipmark4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0; ++ u32 ip, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -147,13 +147,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], + ip_set_mask_from_to(ip, ip_to, cidr); + } + +- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); +- for (; ip <= ip_to; ip++) { ++ for (; ip <= ip_to; ip++, i++) { + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipmark4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c +index 4a54e9e8ae59..c560f7873eca 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipport.c ++++ b/net/netfilter/ipset/ip_set_hash_ipport.c +@@ -104,11 +104,11 @@ static int + hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipport4 *h = set->data; ++ struct hash_ipport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipport4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -172,17 +172,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c +index 09737de5ecc3..b7eb8d1e77d9 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportip.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportip.c +@@ -107,11 +107,11 @@ static int + hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportip4 *h = set->data; ++ struct hash_ipportip4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportip4_elem e = { .ip = 0 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip, ip_to = 0, p = 0, port, port_to; ++ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0; + bool with_ports = false; + int ret; + +@@ -179,17 +179,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + if (retried) + ip = ntohl(h->next.ip); + for (; ip <= ip_to; ip++) { + p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) + : port; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.ip = htonl(ip); + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportip4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c +index 02685371a682..16c5641ced53 100644 +--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c ++++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c +@@ -159,12 +159,12 @@ static int + hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_ipportnet4 *h = set->data; ++ struct hash_ipportnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0, p = 0, port, port_to; +- u32 ip2_from = 0, ip2_to = 0, ip2; ++ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -252,9 +252,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + swap(port, port_to); + } + +- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; +- + ip2_to = ip2_from; + if (tb[IPSET_ATTR_IP2_TO]) { + ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to); +@@ -281,9 +278,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], + for (; p <= port_to; p++) { + e.port = htons(p); + do { ++ i++; + e.ip2 = htonl(ip2); + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr); + e.cidr = cidr - 1; ++ if (i > IPSET_MAX_RANGE) { ++ hash_ipportnet4_data_next(&h->next, ++ &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c +index 9d1beaacb973..5ab5873d1d16 100644 +--- a/net/netfilter/ipset/ip_set_hash_net.c ++++ b/net/netfilter/ipset/ip_set_hash_net.c +@@ -135,11 +135,11 @@ static int + hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_net4 *h = set->data; ++ struct hash_net4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_net4_elem e = { .cidr = HOST_MASK }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -187,19 +187,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], + if (ip + UINT_MAX == ip_to) + return -IPSET_ERR_HASH_RANGE; + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_net4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c +index c3ada9c63fa3..7ef240380a45 100644 +--- a/net/netfilter/ipset/ip_set_hash_netiface.c ++++ b/net/netfilter/ipset/ip_set_hash_netiface.c +@@ -201,7 +201,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 ip = 0, ip_to = 0, ipn, n = 0; ++ u32 ip = 0, ip_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -255,19 +255,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) + ip = ntohl(h->next.ip); + do { ++ i++; + e.ip = htonl(ip); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netiface4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr); + ret = adtfn(set, &e, &ext, &ext, flags); + +diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c +index b1411bc91a40..15f4b0292f0d 100644 +--- a/net/netfilter/ipset/ip_set_hash_netnet.c ++++ b/net/netfilter/ipset/ip_set_hash_netnet.c +@@ -162,13 +162,12 @@ static int + hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netnet4 *h = set->data; ++ struct hash_netnet4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netnet4_elem e = { }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); + u32 ip = 0, ip_to = 0; +- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn; +- u64 n = 0, m = 0; ++ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0; + int ret; + + if (tb[IPSET_ATTR_LINENO]) +@@ -244,19 +243,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]); +- n++; +- } while (ipn++ < ip_to); +- ipn = ip2_from; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]); +- m++; +- } while (ipn++ < ip2_to); +- +- if (n*m > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip[0]); +@@ -269,7 +255,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip[0] = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); + do { ++ i++; + e.ip[1] = htonl(ip2); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netnet4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) +diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c +index d26d13528fe8..e73ba50afe96 100644 +--- a/net/netfilter/ipset/ip_set_hash_netport.c ++++ b/net/netfilter/ipset/ip_set_hash_netport.c +@@ -153,12 +153,11 @@ static int + hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + enum ipset_adt adt, u32 *lineno, u32 flags, bool retried) + { +- const struct hash_netport4 *h = set->data; ++ struct hash_netport4 *h = set->data; + ipset_adtfn adtfn = set->variant->adt[adt]; + struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 }; + struct ip_set_ext ext = IP_SET_INIT_UEXT(set); +- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn; +- u64 n = 0; ++ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0; + bool with_ports = false; + u8 cidr; + int ret; +@@ -235,14 +234,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + } else { + ip_set_mask_from_to(ip, ip_to, e.cidr + 1); + } +- ipn = ip; +- do { +- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr); +- n++; +- } while (ipn++ < ip_to); +- +- if (n*(port_to - port + 1) > IPSET_MAX_RANGE) +- return -ERANGE; + + if (retried) { + ip = ntohl(h->next.ip); +@@ -254,8 +245,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], + e.ip = htonl(ip); + ip = ip_set_range_to_cidr(ip, ip_to, &cidr); + e.cidr = cidr - 1; +- for (; p <= port_to; p++) { ++ for (; p <= port_to; p++, i++) { + e.port = htons(p); ++ if (i > IPSET_MAX_RANGE) { ++ hash_netport4_data_next(&h->next, &e); ++ return -ERANGE; ++ } + ret = adtfn(set, &e, &ext, &ext, flags); + if (ret && !ip_set_eexist(ret, flags)) + return ret; +-- +2.35.1 + diff --git a/queue-5.10/nfc-fix-potential-resource-leaks.patch b/queue-5.10/nfc-fix-potential-resource-leaks.patch new file mode 100644 index 00000000000..372e6f42a82 --- /dev/null +++ b/queue-5.10/nfc-fix-potential-resource-leaks.patch @@ -0,0 +1,127 @@ +From f2207976833000245dd02bdc302937f0fb5af43d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Dec 2022 11:37:18 +0400 +Subject: nfc: Fix potential resource leaks + +From: Miaoqian Lin + +[ Upstream commit df49908f3c52d211aea5e2a14a93bbe67a2cb3af ] + +nfc_get_device() take reference for the device, add missing +nfc_put_device() to release it when not need anymore. +Also fix the style warnning by use error EOPNOTSUPP instead of +ENOTSUPP. + +Fixes: 5ce3f32b5264 ("NFC: netlink: SE API implementation") +Fixes: 29e76924cf08 ("nfc: netlink: Add capability to reply to vendor_cmd with data") +Signed-off-by: Miaoqian Lin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/nfc/netlink.c | 52 ++++++++++++++++++++++++++++++++++------------- + 1 file changed, 38 insertions(+), 14 deletions(-) + +diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c +index b8939ebaa6d3..610caea4feec 100644 +--- a/net/nfc/netlink.c ++++ b/net/nfc/netlink.c +@@ -1497,6 +1497,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + u32 dev_idx, se_idx; + u8 *apdu; + size_t apdu_len; ++ int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + !info->attrs[NFC_ATTR_SE_INDEX] || +@@ -1510,25 +1511,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info) + if (!dev) + return -ENODEV; + +- if (!dev->ops || !dev->ops->se_io) +- return -ENOTSUPP; ++ if (!dev->ops || !dev->ops->se_io) { ++ rc = -EOPNOTSUPP; ++ goto put_dev; ++ } + + apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]); +- if (apdu_len == 0) +- return -EINVAL; ++ if (apdu_len == 0) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]); +- if (!apdu) +- return -EINVAL; ++ if (!apdu) { ++ rc = -EINVAL; ++ goto put_dev; ++ } + + ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL); +- if (!ctx) +- return -ENOMEM; ++ if (!ctx) { ++ rc = -ENOMEM; ++ goto put_dev; ++ } + + ctx->dev_idx = dev_idx; + ctx->se_idx = se_idx; + +- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx); ++ ++put_dev: ++ nfc_put_device(dev); ++ return rc; + } + + static int nfc_genl_vendor_cmd(struct sk_buff *skb, +@@ -1551,14 +1564,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]); + + dev = nfc_get_device(dev_idx); +- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds) ++ if (!dev) + return -ENODEV; + ++ if (!dev->vendor_cmds || !dev->n_vendor_cmds) { ++ err = -ENODEV; ++ goto put_dev; ++ } ++ + if (info->attrs[NFC_ATTR_VENDOR_DATA]) { + data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]); + data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]); +- if (data_len == 0) +- return -EINVAL; ++ if (data_len == 0) { ++ err = -EINVAL; ++ goto put_dev; ++ } + } else { + data = NULL; + data_len = 0; +@@ -1573,10 +1593,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb, + dev->cur_cmd_info = info; + err = cmd->doit(dev, data, data_len); + dev->cur_cmd_info = NULL; +- return err; ++ goto put_dev; + } + +- return -EOPNOTSUPP; ++ err = -EOPNOTSUPP; ++ ++put_dev: ++ nfc_put_device(dev); ++ return err; + } + + /* message building helper */ +-- +2.35.1 + diff --git a/queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch b/queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch new file mode 100644 index 00000000000..98af5bc68ac --- /dev/null +++ b/queue-5.10/nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch @@ -0,0 +1,42 @@ +From 688e07516d4622a53321baaf96cc5241f987f149 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 09:51:30 -0500 +Subject: nfsd: shut down the NFSv4 state objects before the filecache + +From: Jeff Layton + +[ Upstream commit 789e1e10f214c00ca18fc6610824c5b9876ba5f2 ] + +Currently, we shut down the filecache before trying to clean up the +stateids that depend on it. This leads to the kernel trying to free an +nfsd_file twice, and a refcount overput on the nf_mark. + +Change the shutdown procedure to tear down all of the stateids prior +to shutting down the filecache. + +Reported-and-tested-by: Wang Yugui +Signed-off-by: Jeff Layton +Fixes: 5e113224c17e ("nfsd: nfsd_file cache entries should be per net namespace") +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + fs/nfsd/nfssvc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c +index 9323e30a7eaf..c7fffe1453bd 100644 +--- a/fs/nfsd/nfssvc.c ++++ b/fs/nfsd/nfssvc.c +@@ -426,8 +426,8 @@ static void nfsd_shutdown_net(struct net *net) + { + struct nfsd_net *nn = net_generic(net, nfsd_net_id); + +- nfsd_file_cache_shutdown_net(net); + nfs4_state_shutdown_net(net); ++ nfsd_file_cache_shutdown_net(net); + if (nn->lockd_up) { + lockd_down(net); + nn->lockd_up = false; +-- +2.35.1 + diff --git a/queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch b/queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch new file mode 100644 index 00000000000..bc27a812181 --- /dev/null +++ b/queue-5.10/nvme-fix-multipath-crash-caused-by-flush-request-whe.patch @@ -0,0 +1,81 @@ +From 578e47c9239ca21d035014344b391312620c422d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 09:57:21 +0800 +Subject: nvme: fix multipath crash caused by flush request when blktrace is + enabled + +From: Yanjun Zhang + +[ Upstream commit 3659fb5ac29a5e6102bebe494ac789fd47fb78f4 ] + +The flush request initialized by blk_kick_flush has NULL bio, +and it may be dealt with nvme_end_req during io completion. +When blktrace is enabled, nvme_trace_bio_complete with multipath +activated trying to access NULL pointer bio from flush request +results in the following crash: + +[ 2517.831677] BUG: kernel NULL pointer dereference, address: 000000000000001a +[ 2517.835213] #PF: supervisor read access in kernel mode +[ 2517.838724] #PF: error_code(0x0000) - not-present page +[ 2517.842222] PGD 7b2d51067 P4D 0 +[ 2517.845684] Oops: 0000 [#1] SMP NOPTI +[ 2517.849125] CPU: 2 PID: 732 Comm: kworker/2:1H Kdump: loaded Tainted: G S 5.15.67-0.cl9.x86_64 #1 +[ 2517.852723] Hardware name: XFUSION 2288H V6/BC13MBSBC, BIOS 1.13 07/27/2022 +[ 2517.856358] Workqueue: nvme_tcp_wq nvme_tcp_io_work [nvme_tcp] +[ 2517.859993] RIP: 0010:blk_add_trace_bio_complete+0x6/0x30 +[ 2517.863628] Code: 1f 44 00 00 48 8b 46 08 31 c9 ba 04 00 10 00 48 8b 80 50 03 00 00 48 8b 78 50 e9 e5 fe ff ff 0f 1f 44 00 00 41 54 49 89 f4 55 <0f> b6 7a 1a 48 89 d5 e8 3e 1c 2b 00 48 89 ee 4c 89 e7 5d 89 c1 ba +[ 2517.871269] RSP: 0018:ff7f6a008d9dbcd0 EFLAGS: 00010286 +[ 2517.875081] RAX: ff3d5b4be00b1d50 RBX: 0000000002040002 RCX: ff3d5b0a270f2000 +[ 2517.878966] RDX: 0000000000000000 RSI: ff3d5b0b021fb9f8 RDI: 0000000000000000 +[ 2517.882849] RBP: ff3d5b0b96a6fa00 R08: 0000000000000001 R09: 0000000000000000 +[ 2517.886718] R10: 000000000000000c R11: 000000000000000c R12: ff3d5b0b021fb9f8 +[ 2517.890575] R13: 0000000002000000 R14: ff3d5b0b021fb1b0 R15: 0000000000000018 +[ 2517.894434] FS: 0000000000000000(0000) GS:ff3d5b42bfc80000(0000) knlGS:0000000000000000 +[ 2517.898299] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[ 2517.902157] CR2: 000000000000001a CR3: 00000004f023e005 CR4: 0000000000771ee0 +[ 2517.906053] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +[ 2517.909930] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +[ 2517.913761] PKRU: 55555554 +[ 2517.917558] Call Trace: +[ 2517.921294] +[ 2517.924982] nvme_complete_rq+0x1c3/0x1e0 [nvme_core] +[ 2517.928715] nvme_tcp_recv_pdu+0x4d7/0x540 [nvme_tcp] +[ 2517.932442] nvme_tcp_recv_skb+0x4f/0x240 [nvme_tcp] +[ 2517.936137] ? nvme_tcp_recv_pdu+0x540/0x540 [nvme_tcp] +[ 2517.939830] tcp_read_sock+0x9c/0x260 +[ 2517.943486] nvme_tcp_try_recv+0x65/0xa0 [nvme_tcp] +[ 2517.947173] nvme_tcp_io_work+0x64/0x90 [nvme_tcp] +[ 2517.950834] process_one_work+0x1e8/0x390 +[ 2517.954473] worker_thread+0x53/0x3c0 +[ 2517.958069] ? process_one_work+0x390/0x390 +[ 2517.961655] kthread+0x10c/0x130 +[ 2517.965211] ? set_kthread_struct+0x40/0x40 +[ 2517.968760] ret_from_fork+0x1f/0x30 +[ 2517.972285] + +To avoid this situation, add a NULL check for req->bio before +calling trace_block_bio_complete. + +Signed-off-by: Yanjun Zhang +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/nvme.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h +index 86336496c65c..c3e4d9b6f9c0 100644 +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -749,7 +749,7 @@ static inline void nvme_trace_bio_complete(struct request *req, + { + struct nvme_ns *ns = req->q->queuedata; + +- if (req->cmd_flags & REQ_NVME_MPATH) ++ if ((req->cmd_flags & REQ_NVME_MPATH) && req->bio) + trace_block_bio_complete(ns->head->disk->queue, req->bio); + } + +-- +2.35.1 + diff --git a/queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch b/queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch new file mode 100644 index 00000000000..18038d4fb3d --- /dev/null +++ b/queue-5.10/perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch @@ -0,0 +1,92 @@ +From dc0916234e76bd77e113ff84af947a8fd888f824 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 5 Nov 2022 12:01:14 +0900 +Subject: perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as + unsinged data + +From: Masami Hiramatsu (Google) + +[ Upstream commit a9dfc46c67b52ad43b8e335e28f4cf8002c67793 ] + +DWARF version 5 standard Sec 2.14 says that + + Any debugging information entry representing the declaration of an object, + module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and + DW_AT_decl_column attributes, each of whose value is an unsigned integer + constant. + +So it should be an unsigned integer data. Also, even though the standard +doesn't clearly say the DW_AT_call_file is signed or unsigned, the +elfutils (eu-readelf) interprets it as unsigned integer data and it is +natural to handle it as unsigned integer data as same as DW_AT_decl_file. +This changes the DW_AT_call_file as unsigned integer data too. + +Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances") +Signed-off-by: Masami Hiramatsu +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Masami Hiramatsu +Cc: Peter Zijlstra +Cc: stable@vger.kernel.org +Cc: Steven Rostedt (VMware) +Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3 +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/dwarf-aux.c | 21 ++++----------------- + 1 file changed, 4 insertions(+), 17 deletions(-) + +diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c +index dc02685a1eec..f8a10d5148f6 100644 +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + return 0; + } + +-/* Get attribute and translate it as a sdata */ +-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, +- Dwarf_Sword *result) +-{ +- Dwarf_Attribute attr; +- +- if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || +- dwarf_formsdata(&attr, result) != 0) +- return -ENOENT; +- +- return 0; +-} +- + /** + * die_is_signed_type - Check whether a type DIE is signed or not + * @tp_die: a DIE of a type +@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) + /* Get the call file index number in CU DIE */ + static int die_get_call_fileno(Dwarf_Die *in_die) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) ++ if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die) + /* Get the declared file index number in CU DIE */ + static int die_get_decl_fileno(Dwarf_Die *pdie) + { +- Dwarf_Sword idx; ++ Dwarf_Word idx; + +- if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) ++ if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) + return (int)idx; + else + return -ENOENT; +-- +2.35.1 + diff --git a/queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch b/queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch new file mode 100644 index 00000000000..903c312f6bf --- /dev/null +++ b/queue-5.10/perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch @@ -0,0 +1,54 @@ +From e480e680d894c61da72df39bfd72d3f251bff844 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Nov 2022 22:48:39 +0900 +Subject: perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor + +From: Masami Hiramatsu (Google) + +[ Upstream commit f828929ab7f0dc3353e4a617f94f297fa8f3dec3 ] + +Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute +acccessor functions, so that it can find the specified attribute from +abstact origin DIE etc. + +Signed-off-by: Masami Hiramatsu +Acked-by: Namhyung Kim +Cc: Alexander Shishkin +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Peter Zijlstra +Cc: Steven Rostedt (VMware) +Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3 +Signed-off-by: Arnaldo Carvalho de Melo +Stable-dep-of: a9dfc46c67b5 ("perf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data") +Signed-off-by: Sasha Levin +--- + tools/perf/util/dwarf-aux.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c +index 4343356f3cf9..dc02685a1eec 100644 +--- a/tools/perf/util/dwarf-aux.c ++++ b/tools/perf/util/dwarf-aux.c +@@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, + { + Dwarf_Attribute attr; + +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || ++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || + dwarf_formudata(&attr, result) != 0) + return -ENOENT; + +@@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, + { + Dwarf_Attribute attr; + +- if (dwarf_attr(tp_die, attr_name, &attr) == NULL || ++ if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || + dwarf_formsdata(&attr, result) != 0) + return -ENOENT; + +-- +2.35.1 + diff --git a/queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch b/queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch new file mode 100644 index 00000000000..d51ed74696f --- /dev/null +++ b/queue-5.10/perf-tools-fix-resources-leak-in-perf_data__open_dir.patch @@ -0,0 +1,52 @@ +From 42bfbd41424e10fcc90207a8958b6eac3b94d26f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 13:09:00 +0400 +Subject: perf tools: Fix resources leak in perf_data__open_dir() + +From: Miaoqian Lin + +[ Upstream commit 0a6564ebd953c4590663c9a3c99a3ea9920ade6f ] + +In perf_data__open_dir(), opendir() opens the directory stream. Add +missing closedir() to release it after use. + +Fixes: eb6176709b235b96 ("perf data: Add perf_data__open_dir_data function") +Reviewed-by: Adrian Hunter +Signed-off-by: Miaoqian Lin +Cc: Alexander Shishkin +Cc: Alexey Bayduraev +Cc: Ingo Molnar +Cc: Jiri Olsa +Cc: Mark Rutland +Cc: Namhyung Kim +Cc: Peter Zijlstra +Link: https://lore.kernel.org/r/20221229090903.1402395-1-linmq006@gmail.com +Signed-off-by: Arnaldo Carvalho de Melo +Signed-off-by: Sasha Levin +--- + tools/perf/util/data.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c +index 48754083791d..29d32ba046b5 100644 +--- a/tools/perf/util/data.c ++++ b/tools/perf/util/data.c +@@ -127,6 +127,7 @@ int perf_data__open_dir(struct perf_data *data) + file->size = st.st_size; + } + ++ closedir(dir); + if (!files) + return -EINVAL; + +@@ -135,6 +136,7 @@ int perf_data__open_dir(struct perf_data *data) + return 0; + + out_err: ++ closedir(dir); + close_dir(files, nr); + return ret; + } +-- +2.35.1 + diff --git a/queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch b/queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch new file mode 100644 index 00000000000..a404161ebe8 --- /dev/null +++ b/queue-5.10/qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch @@ -0,0 +1,103 @@ +From cec73bb81bf3534b075582b5e336bf854e379150 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Dec 2022 14:52:28 +0300 +Subject: qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure + +From: Daniil Tatianin + +[ Upstream commit 13a7c8964afcd8ca43c0b6001ebb0127baa95362 ] + +adapter->dcb would get silently freed inside qlcnic_dcb_enable() in +case qlcnic_dcb_attach() would return an error, which always happens +under OOM conditions. This would lead to use-after-free because both +of the existing callers invoke qlcnic_dcb_get_info() on the obtained +pointer, which is potentially freed at that point. + +Propagate errors from qlcnic_dcb_enable(), and instead free the dcb +pointer at callsite using qlcnic_dcb_free(). This also removes the now +unused qlcnic_clear_dcb_ops() helper, which was a simple wrapper around +kfree() also causing memory leaks for partially initialized dcb. + +Found by Linux Verification Center (linuxtesting.org) with the SVACE +static analysis tool. + +Fixes: 3c44bba1d270 ("qlcnic: Disable DCB operations from SR-IOV VFs") +Reviewed-by: Michal Swiatkowski +Signed-off-by: Daniil Tatianin +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c | 8 +++++++- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h | 10 ++-------- + drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c | 8 +++++++- + 3 files changed, 16 insertions(+), 10 deletions(-) + +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +index d2c190732d3e..beeeec8516b8 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_init.c +@@ -2505,7 +2505,13 @@ int qlcnic_83xx_init(struct qlcnic_adapter *adapter, int pci_using_dac) + goto disable_mbx_intr; + + qlcnic_83xx_clear_function_resources(adapter); +- qlcnic_dcb_enable(adapter->dcb); ++ ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ goto disable_mbx_intr; ++ } ++ + qlcnic_83xx_initialize_nic(adapter, 1); + qlcnic_dcb_get_info(adapter->dcb); + +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +index 7519773eaca6..22afa2be85fd 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_dcb.h +@@ -41,11 +41,6 @@ struct qlcnic_dcb { + unsigned long state; + }; + +-static inline void qlcnic_clear_dcb_ops(struct qlcnic_dcb *dcb) +-{ +- kfree(dcb); +-} +- + static inline int qlcnic_dcb_get_hw_capability(struct qlcnic_dcb *dcb) + { + if (dcb && dcb->ops->get_hw_capability) +@@ -112,9 +107,8 @@ static inline void qlcnic_dcb_init_dcbnl_ops(struct qlcnic_dcb *dcb) + dcb->ops->init_dcbnl_ops(dcb); + } + +-static inline void qlcnic_dcb_enable(struct qlcnic_dcb *dcb) ++static inline int qlcnic_dcb_enable(struct qlcnic_dcb *dcb) + { +- if (dcb && qlcnic_dcb_attach(dcb)) +- qlcnic_clear_dcb_ops(dcb); ++ return dcb ? qlcnic_dcb_attach(dcb) : 0; + } + #endif +diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +index 27c07b2412f4..44b745293fd0 100644 +--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c ++++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +@@ -2622,7 +2622,13 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) + "Device does not support MSI interrupts\n"); + + if (qlcnic_82xx_check(adapter)) { +- qlcnic_dcb_enable(adapter->dcb); ++ err = qlcnic_dcb_enable(adapter->dcb); ++ if (err) { ++ qlcnic_dcb_free(adapter->dcb); ++ dev_err(&pdev->dev, "Failed to enable DCB\n"); ++ goto err_out_free_hw; ++ } ++ + qlcnic_dcb_get_info(adapter->dcb); + err = qlcnic_setup_intr(adapter); + +-- +2.35.1 + diff --git a/queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch b/queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch new file mode 100644 index 00000000000..2428a95053d --- /dev/null +++ b/queue-5.10/ravb-fix-failed-to-switch-device-to-config-mode-mess.patch @@ -0,0 +1,68 @@ +From 33bf3c123913208621351b93993c0f9321a8d536 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 14 Dec 2022 10:51:18 +0000 +Subject: ravb: Fix "failed to switch device to config mode" message during + unbind + +From: Biju Das + +[ Upstream commit c72a7e42592b2e18d862cf120876070947000d7a ] + +This patch fixes the error "ravb 11c20000.ethernet eth0: failed to switch +device to config mode" during unbind. + +We are doing register access after pm_runtime_put_sync(). + +We usually do cleanup in reverse order of init. Currently in +remove(), the "pm_runtime_put_sync" is not in reverse order. + +Probe + reset_control_deassert(rstc); + pm_runtime_enable(&pdev->dev); + pm_runtime_get_sync(&pdev->dev); + +remove + pm_runtime_put_sync(&pdev->dev); + unregister_netdev(ndev); + .. + ravb_mdio_release(priv); + pm_runtime_disable(&pdev->dev); + +Consider the call to unregister_netdev() +unregister_netdev->unregister_netdevice_queue->rollback_registered_many +that calls the below functions which access the registers after +pm_runtime_put_sync() + 1) ravb_get_stats + 2) ravb_close + +Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper") +Cc: stable@vger.kernel.org +Signed-off-by: Biju Das +Reviewed-by: Leon Romanovsky +Link: https://lore.kernel.org/r/20221214105118.2495313-1-biju.das.jz@bp.renesas.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/renesas/ravb_main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c +index 9e7b85e178fd..9ec6d63691aa 100644 +--- a/drivers/net/ethernet/renesas/ravb_main.c ++++ b/drivers/net/ethernet/renesas/ravb_main.c +@@ -2253,11 +2253,11 @@ static int ravb_remove(struct platform_device *pdev) + priv->desc_bat_dma); + /* Set reset mode */ + ravb_write(ndev, CCC_OPC_RESET, CCC); +- pm_runtime_put_sync(&pdev->dev); + unregister_netdev(ndev); + netif_napi_del(&priv->napi[RAVB_NC]); + netif_napi_del(&priv->napi[RAVB_BE]); + ravb_mdio_release(priv); ++ pm_runtime_put_sync(&pdev->dev); + pm_runtime_disable(&pdev->dev); + free_netdev(ndev); + platform_set_drvdata(pdev, NULL); +-- +2.35.1 + diff --git a/queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch b/queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch new file mode 100644 index 00000000000..1b379178660 --- /dev/null +++ b/queue-5.10/rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch @@ -0,0 +1,95 @@ +From 993586699c4ab15e27a8eae08031347c7bad271b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Dec 2022 14:56:10 +0200 +Subject: RDMA/mlx5: Fix validation of max_rd_atomic caps for DC + +From: Maor Gottlieb + +[ Upstream commit 8de8482fe5732fbef4f5af82bc0c0362c804cd1f ] + +Currently, when modifying DC, we validate max_rd_atomic user attribute +against the RC cap, validate against DC. RC and DC QP types have different +device limitations. + +This can cause userspace created DC QPs to malfunction. + +Fixes: c32a4f296e1d ("IB/mlx5: Add support for DC Initiator QP") +Link: https://lore.kernel.org/r/0c5aee72cea188c3bb770f4207cce7abc9b6fc74.1672231736.git.leonro@nvidia.com +Signed-off-by: Maor Gottlieb +Signed-off-by: Leon Romanovsky +Signed-off-by: Sasha Levin +--- + drivers/infiniband/hw/mlx5/qp.c | 49 +++++++++++++++++++++++---------- + 1 file changed, 35 insertions(+), 14 deletions(-) + +diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c +index 7a2bec0ac005..0caff276f2c1 100644 +--- a/drivers/infiniband/hw/mlx5/qp.c ++++ b/drivers/infiniband/hw/mlx5/qp.c +@@ -4258,6 +4258,40 @@ static bool mlx5_ib_modify_qp_allowed(struct mlx5_ib_dev *dev, + return false; + } + ++static int validate_rd_atomic(struct mlx5_ib_dev *dev, struct ib_qp_attr *attr, ++ int attr_mask, enum ib_qp_type qp_type) ++{ ++ int log_max_ra_res; ++ int log_max_ra_req; ++ ++ if (qp_type == MLX5_IB_QPT_DCI) { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_dc); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_dc); ++ } else { ++ log_max_ra_res = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_res_qp); ++ log_max_ra_req = 1 << MLX5_CAP_GEN(dev->mdev, ++ log_max_ra_req_qp); ++ } ++ ++ if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && ++ attr->max_rd_atomic > log_max_ra_res) { ++ mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", ++ attr->max_rd_atomic); ++ return false; ++ } ++ ++ if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && ++ attr->max_dest_rd_atomic > log_max_ra_req) { ++ mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", ++ attr->max_dest_rd_atomic); ++ return false; ++ } ++ return true; ++} ++ + int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) + { +@@ -4352,21 +4386,8 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + } + } + +- if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC && +- attr->max_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_res_qp))) { +- mlx5_ib_dbg(dev, "invalid max_rd_atomic value %d\n", +- attr->max_rd_atomic); +- goto out; +- } +- +- if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC && +- attr->max_dest_rd_atomic > +- (1 << MLX5_CAP_GEN(dev->mdev, log_max_ra_req_qp))) { +- mlx5_ib_dbg(dev, "invalid max_dest_rd_atomic value %d\n", +- attr->max_dest_rd_atomic); ++ if (!validate_rd_atomic(dev, attr, attr_mask, qp_type)) + goto out; +- } + + if (cur_state == new_state && cur_state == IB_QPS_RESET) { + err = 0; +-- +2.35.1 + diff --git a/queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch b/queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch new file mode 100644 index 00000000000..8b1a74ebfe6 --- /dev/null +++ b/queue-5.10/riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch @@ -0,0 +1,72 @@ +From afa9eeebca764368dc006710606b9309b4ab596c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 11 Jan 2021 20:40:14 +0800 +Subject: riscv/stacktrace: Fix stack output without ra on the stack top + +From: Chen Huang + +[ Upstream commit f766f77a74f5784d8d4d3c36b1900731f97d08d0 ] + +When a function doesn't have a callee, then it will not +push ra into the stack, such as lkdtm_BUG() function, + +addi sp,sp,-16 +sd s0,8(sp) +addi s0,sp,16 +ebreak + +The struct stackframe use {fp,ra} to get information from +stack, if walk_stackframe() with pr_regs, we will obtain +wrong value and bad stacktrace, + +[] lkdtm_BUG+0x6/0x8 +---[ end trace 18da3fbdf08e25d5 ]--- + +Correct the next fp and pc, after that, full stacktrace +shown as expects, + +[] lkdtm_BUG+0x6/0x8 +[] lkdtm_do_action+0x14/0x1c +[] direct_entry+0xc0/0x10a +[] full_proxy_write+0x42/0x6a +[] vfs_write+0x7e/0x214 +[] ksys_write+0x98/0xc0 +[] sys_write+0xe/0x16 +[] ret_from_syscall+0x0/0x2 +---[ end trace 61917f3d9a9fadcd ]--- + +Signed-off-by: Chen Huang +Signed-off-by: Kefeng Wang +Signed-off-by: Palmer Dabbelt +Stable-dep-of: 5c3022e4a616 ("riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument") +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/stacktrace.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c +index 595342910c3f..6cbde6b43fd2 100644 +--- a/arch/riscv/kernel/stacktrace.c ++++ b/arch/riscv/kernel/stacktrace.c +@@ -57,9 +57,15 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, + /* Unwind stack frame */ + frame = (struct stackframe *)fp - 1; + sp = fp; +- fp = frame->fp; +- pc = ftrace_graph_ret_addr(current, NULL, frame->ra, +- (unsigned long *)(fp - 8)); ++ if (regs && (regs->epc == pc) && (frame->fp & 0x7)) { ++ fp = frame->ra; ++ pc = regs->ra; ++ } else { ++ fp = frame->fp; ++ pc = ftrace_graph_ret_addr(current, NULL, frame->ra, ++ (unsigned long *)(fp - 8)); ++ } ++ + } + } + +-- +2.35.1 + diff --git a/queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch b/queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch new file mode 100644 index 00000000000..9217b03ff8d --- /dev/null +++ b/queue-5.10/riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch @@ -0,0 +1,41 @@ +From 831306ee94a03f0106cd822d683a3d63dcd88713 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 01:49:36 -0500 +Subject: riscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument + +From: Guo Ren + +[ Upstream commit 5c3022e4a616d800cf5f4c3a981d7992179e44a1 ] + +The 'retp' is a pointer to the return address on the stack, so we +must pass the current return address pointer as the 'retp' +argument to ftrace_push_return_trace(). Not parent function's +return address on the stack. + +Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support") +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/stacktrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c +index 6cbde6b43fd2..1e53fbe5eb78 100644 +--- a/arch/riscv/kernel/stacktrace.c ++++ b/arch/riscv/kernel/stacktrace.c +@@ -63,7 +63,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, + } else { + fp = frame->fp; + pc = ftrace_graph_ret_addr(current, NULL, frame->ra, +- (unsigned long *)(fp - 8)); ++ &frame->ra); + } + + } +-- +2.35.1 + diff --git a/queue-5.10/series b/queue-5.10/series index 497279a365f..21fc4452fc0 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -696,3 +696,64 @@ x86-mce-amd-clear-dfr-errors-found-in-thr-handler.patch media-s5p-mfc-fix-to-handle-reference-queue-during-f.patch media-s5p-mfc-clear-workbit-to-handle-error-conditio.patch media-s5p-mfc-fix-in-register-read-and-write-for-h26.patch +perf-probe-use-dwarf_attr_integrate-as-generic-dwarf.patch +perf-probe-fix-to-get-the-dw_at_decl_file-and-dw_at_.patch +x86-kprobes-convert-to-insn_decode.patch +x86-kprobes-fix-optprobe-optimization-check-with-con.patch +staging-media-tegra-video-fix-device_node-use-after-.patch +ravb-fix-failed-to-switch-device-to-config-mode-mess.patch +riscv-stacktrace-fix-stack-output-without-ra-on-the-.patch +riscv-stacktrace-fixup-ftrace_graph_ret_addr-retp-ar.patch +ext4-goto-right-label-failed_mount3a.patch +ext4-correct-inconsistent-error-msg-in-nojournal-mod.patch +mm-highmem-lift-memcpy_-to-from-_page-to-core.patch +ext4-use-memcpy_to_page-in-pagecache_write.patch +fs-ext4-initialize-fsdata-in-pagecache_write.patch +ext4-move-functions-in-super.c.patch +ext4-simplify-ext4-error-translation.patch +ext4-fix-various-seppling-typos.patch +ext4-fix-leaking-uninitialized-memory-in-fast-commit.patch +ext4-use-kmemdup-to-replace-kmalloc-memcpy.patch +mbcache-don-t-reclaim-used-entries.patch +mbcache-add-functions-to-delete-entry-if-unused.patch +ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch +ext4-unindent-codeblock-in-ext4_xattr_block_set.patch +ext4-fix-race-when-reusing-xattr-blocks.patch +mbcache-automatically-delete-entries-from-cache-on-f.patch +ext4-fix-deadlock-due-to-mbcache-entry-corruption.patch +sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch +bpf-pull-before-calling-skb_postpull_rcsum.patch +drm-panfrost-fix-gem-handle-creation-ref-counting.patch +vmxnet3-correctly-report-csum_level-for-encapsulated.patch +veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch +nfsd-shut-down-the-nfsv4-state-objects-before-the-fi.patch +net-hns3-add-interrupts-re-initialization-while-doin.patch +net-sched-fix-memory-leak-in-tcindex_set_parms.patch +qlcnic-prevent-dcb-use-after-free-on-qlcnic_dcb_enab.patch +nfc-fix-potential-resource-leaks.patch +vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch +vringh-fix-range-used-in-iotlb_translate.patch +vhost-fix-range-used-in-translate_desc.patch +net-mlx5-add-forgotten-cleanup-calls-into-mlx5_init_.patch +net-mlx5-avoid-recovery-in-probe-flows.patch +net-mlx5e-ipoib-don-t-allow-cqe-compression-to-be-tu.patch +net-mlx5e-fix-hw-mtu-initializing-at-xdp-sq-allocati.patch +net-amd-xgbe-add-missed-tasklet_kill.patch +net-phy-xgmiitorgmii-fix-refcount-leak-in-xgmiitorgm.patch +rdma-mlx5-fix-validation-of-max_rd_atomic-caps-for-d.patch +drm-meson-reduce-the-fifo-lines-held-when-afbc-is-no.patch +filelock-new-helper-vfs_inode_has_locks.patch +ceph-switch-to-vfs_inode_has_locks-to-fix-file-lock-.patch +gpio-sifive-fix-refcount-leak-in-sifive_gpio_probe.patch +net-sched-atm-dont-intepret-cls-results-when-asked-t.patch +net-sched-cbq-dont-intepret-cls-results-when-asked-t.patch +netfilter-ipset-fix-hash-net-port-net-hang-with-0-su.patch +netfilter-ipset-rework-long-task-execution-when-addi.patch +perf-tools-fix-resources-leak-in-perf_data__open_dir.patch +drivers-net-bonding-bond_3ad-return-when-there-s-no-.patch +usb-rndis_host-secure-rndis_query-check-against-int-.patch +drm-i915-unpin-on-error-in-intel_vgpu_shadow_mm_pin.patch +caif-fix-memory-leak-in-cfctrl_linkup_request.patch +udf-fix-extension-of-the-last-extent-in-the-file.patch +asoc-intel-bytcr_rt5640-add-quirk-for-the-advantech-.patch +nvme-fix-multipath-crash-caused-by-flush-request-whe.patch diff --git a/queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch b/queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch new file mode 100644 index 00000000000..93bf5f19c88 --- /dev/null +++ b/queue-5.10/staging-media-tegra-video-fix-device_node-use-after-.patch @@ -0,0 +1,63 @@ +From 313fcdde82b935db9e65234c26bba235ac95bd6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 2 Nov 2022 12:01:02 +0100 +Subject: staging: media: tegra-video: fix device_node use after free + +From: Luca Ceresoli + +[ Upstream commit c4d344163c3a7f90712525f931a6c016bbb35e18 ] + +At probe time this code path is followed: + + * tegra_csi_init + * tegra_csi_channels_alloc + * for_each_child_of_node(node, channel) -- iterates over channels + * automatically gets 'channel' + * tegra_csi_channel_alloc() + * saves into chan->of_node a pointer to the channel OF node + * automatically gets and puts 'channel' + * now the node saved in chan->of_node has refcount 0, can disappear + * tegra_csi_channels_init + * iterates over channels + * tegra_csi_channel_init -- uses chan->of_node + +After that, chan->of_node keeps storing the node until the device is +removed. + +of_node_get() the node and of_node_put() it during teardown to avoid any +risk. + +Fixes: 1ebaeb09830f ("media: tegra-video: Add support for external sensor capture") +Cc: stable@vger.kernel.org +Cc: Sowjanya Komatineni +Signed-off-by: Luca Ceresoli +Signed-off-by: Hans Verkuil +Signed-off-by: Sasha Levin +--- + drivers/staging/media/tegra-video/csi.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/staging/media/tegra-video/csi.c b/drivers/staging/media/tegra-video/csi.c +index edfdf6db457d..dc5d432a09e8 100644 +--- a/drivers/staging/media/tegra-video/csi.c ++++ b/drivers/staging/media/tegra-video/csi.c +@@ -420,7 +420,7 @@ static int tegra_csi_channel_alloc(struct tegra_csi *csi, + chan->csi = csi; + chan->csi_port_num = port_num; + chan->numlanes = lanes; +- chan->of_node = node; ++ chan->of_node = of_node_get(node); + chan->numpads = num_pads; + if (num_pads & 0x2) { + chan->pads[0].flags = MEDIA_PAD_FL_SINK; +@@ -621,6 +621,7 @@ static void tegra_csi_channels_cleanup(struct tegra_csi *csi) + media_entity_cleanup(&subdev->entity); + } + ++ of_node_put(chan->of_node); + list_del(&chan->list); + kfree(chan); + } +-- +2.35.1 + diff --git a/queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch b/queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch new file mode 100644 index 00000000000..7b21336b2a3 --- /dev/null +++ b/queue-5.10/sunrpc-ensure-the-matching-upcall-is-in-flight-upon-.patch @@ -0,0 +1,133 @@ +From 60fb278dfec2a5c778924ec719059742d758f667 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 13 Dec 2022 13:14:31 +0900 +Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall + +From: minoura makoto + +[ Upstream commit b18cba09e374637a0a3759d856a6bca94c133952 ] + +Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid +but different gss service") introduced `auth` argument to +__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL +since it (and auth->service) was not (yet) determined. + +When multiple upcalls with the same uid and different service are +ongoing, it could happen that __gss_find_upcall(), which returns the +first match found in the pipe->in_downcall list, could not find the +correct gss_msg corresponding to the downcall we are looking for. +Moreover, it might return a msg which is not sent to rpc.gssd yet. + +We could see mount.nfs process hung in D state with multiple mount.nfs +are executed in parallel. The call trace below is of CentOS 7.9 +kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/ +elrepo kernel-ml-6.0.7-1.el7. + +PID: 71258 TASK: ffff91ebd4be0000 CPU: 36 COMMAND: "mount.nfs" + #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f + #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9 + #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss] + #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc +[sunrpc] + #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss] + #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc] + #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc] + #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc] + #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc] + #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc] + +The scenario is like this. Let's say there are two upcalls for +services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe. + +When rpc.gssd reads pipe to get the upcall msg corresponding to +service B from pipe->pipe and then writes the response, in +gss_pipe_downcall the msg corresponding to service A will be picked +because only uid is used to find the msg and it is before the one for +B in pipe->in_downcall. And the process waiting for the msg +corresponding to service A will be woken up. + +Actual scheduing of that process might be after rpc.gssd processes the +next msg. In rpc_pipe_generic_upcall it clears msg->errno (for A). +The process is scheduled to see gss_msg->ctx == NULL and +gss_msg->msg.errno == 0, therefore it cannot break the loop in +gss_create_upcall and is never woken up after that. + +This patch adds a simple check to ensure that a msg which is not +sent to rpc.gssd yet is not chosen as the matching upcall upon +receiving a downcall. + +Signed-off-by: minoura makoto +Signed-off-by: Hiroshi Shimamoto +Tested-by: Hiroshi Shimamoto +Cc: Trond Myklebust +Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service") +Signed-off-by: Trond Myklebust +Signed-off-by: Sasha Levin +--- + include/linux/sunrpc/rpc_pipe_fs.h | 5 +++++ + net/sunrpc/auth_gss/auth_gss.c | 19 +++++++++++++++++-- + 2 files changed, 22 insertions(+), 2 deletions(-) + +diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h +index cd188a527d16..3b35b6f6533a 100644 +--- a/include/linux/sunrpc/rpc_pipe_fs.h ++++ b/include/linux/sunrpc/rpc_pipe_fs.h +@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *, + char __user *, size_t); + extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *); + ++/* returns true if the msg is in-flight, i.e., already eaten by the peer */ ++static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) { ++ return (msg->copied != 0 && list_empty(&msg->list)); ++} ++ + struct rpc_clnt; + extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *); + extern int rpc_remove_client_dir(struct rpc_clnt *); +diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c +index 5f42aa5fc612..2ff66a6a7e54 100644 +--- a/net/sunrpc/auth_gss/auth_gss.c ++++ b/net/sunrpc/auth_gss/auth_gss.c +@@ -301,7 +301,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth + list_for_each_entry(pos, &pipe->in_downcall, list) { + if (!uid_eq(pos->uid, uid)) + continue; +- if (auth && pos->auth->service != auth->service) ++ if (pos->auth->service != auth->service) + continue; + refcount_inc(&pos->count); + return pos; +@@ -685,6 +685,21 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred) + return err; + } + ++static struct gss_upcall_msg * ++gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid) ++{ ++ struct gss_upcall_msg *pos; ++ list_for_each_entry(pos, &pipe->in_downcall, list) { ++ if (!uid_eq(pos->uid, uid)) ++ continue; ++ if (!rpc_msg_is_inflight(&pos->msg)) ++ continue; ++ refcount_inc(&pos->count); ++ return pos; ++ } ++ return NULL; ++} ++ + #define MSG_BUF_MAXSIZE 1024 + + static ssize_t +@@ -731,7 +746,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) + err = -ENOENT; + /* Find a matching upcall */ + spin_lock(&pipe->lock); +- gss_msg = __gss_find_upcall(pipe, uid, NULL); ++ gss_msg = gss_find_downcall(pipe, uid); + if (gss_msg == NULL) { + spin_unlock(&pipe->lock); + goto err_put_ctx; +-- +2.35.1 + diff --git a/queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch b/queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch new file mode 100644 index 00000000000..dd8e1861309 --- /dev/null +++ b/queue-5.10/udf-fix-extension-of-the-last-extent-in-the-file.patch @@ -0,0 +1,37 @@ +From a907a679427fde4f26b7adee8f228079c3052710 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 21 Dec 2022 17:45:51 +0100 +Subject: udf: Fix extension of the last extent in the file + +From: Jan Kara + +[ Upstream commit 83c7423d1eb6806d13c521d1002cc1a012111719 ] + +When extending the last extent in the file within the last block, we +wrongly computed the length of the last extent. This is mostly a +cosmetical problem since the extent does not contain any data and the +length will be fixed up by following operations but still. + +Fixes: 1f3868f06855 ("udf: Fix extending file within last block") +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/udf/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/udf/inode.c b/fs/udf/inode.c +index e94a18bb7f99..2132bfab67f3 100644 +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -599,7 +599,7 @@ static void udf_do_extend_final_block(struct inode *inode, + */ + if (new_elen <= (last_ext->extLength & UDF_EXTENT_LENGTH_MASK)) + return; +- added_bytes = (last_ext->extLength & UDF_EXTENT_LENGTH_MASK) - new_elen; ++ added_bytes = new_elen - (last_ext->extLength & UDF_EXTENT_LENGTH_MASK); + last_ext->extLength += added_bytes; + UDF_I(inode)->i_lenExtents += added_bytes; + +-- +2.35.1 + diff --git a/queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch b/queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch new file mode 100644 index 00000000000..050173eca8f --- /dev/null +++ b/queue-5.10/usb-rndis_host-secure-rndis_query-check-against-int-.patch @@ -0,0 +1,43 @@ +From 6f1609ba6535cb41c4734901ab0b7963164c653e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 Jan 2023 10:17:09 +0100 +Subject: usb: rndis_host: Secure rndis_query check against int overflow + +From: Szymon Heidrich + +[ Upstream commit c7dd13805f8b8fc1ce3b6d40f6aff47e66b72ad2 ] + +Variables off and len typed as uint32 in rndis_query function +are controlled by incoming RNDIS response message thus their +value may be manipulated. Setting off to a unexpectetly large +value will cause the sum with len and 8 to overflow and pass +the implemented validation step. Consequently the response +pointer will be referring to a location past the expected +buffer boundaries allowing information leakage e.g. via +RNDIS_OID_802_3_PERMANENT_ADDRESS OID. + +Fixes: ddda08624013 ("USB: rndis_host, various cleanups") +Signed-off-by: Szymon Heidrich +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/usb/rndis_host.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/usb/rndis_host.c b/drivers/net/usb/rndis_host.c +index 1505fe3f87ed..1ff723e15d52 100644 +--- a/drivers/net/usb/rndis_host.c ++++ b/drivers/net/usb/rndis_host.c +@@ -255,7 +255,8 @@ static int rndis_query(struct usbnet *dev, struct usb_interface *intf, + + off = le32_to_cpu(u.get_c->offset); + len = le32_to_cpu(u.get_c->len); +- if (unlikely((8 + off + len) > CONTROL_BUFFER_SIZE)) ++ if (unlikely((off > CONTROL_BUFFER_SIZE - 8) || ++ (len > CONTROL_BUFFER_SIZE - 8 - off))) + goto response_error; + + if (*reply_len != -1 && len != *reply_len) +-- +2.35.1 + diff --git a/queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch b/queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch new file mode 100644 index 00000000000..8a477aaf7b3 --- /dev/null +++ b/queue-5.10/veth-fix-race-with-af_xdp-exposing-old-or-uninitiali.patch @@ -0,0 +1,88 @@ +From 19ad77894525f87a61981b22619cca345fdc9e79 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 12:59:03 -0600 +Subject: veth: Fix race with AF_XDP exposing old or uninitialized descriptors + +From: Shawn Bohrer + +[ Upstream commit fa349e396e4886d742fd6501c599ec627ef1353b ] + +When AF_XDP is used on on a veth interface the RX ring is updated in two +steps. veth_xdp_rcv() removes packet descriptors from the FILL ring +fills them and places them in the RX ring updating the cached_prod +pointer. Later xdp_do_flush() syncs the RX ring prod pointer with the +cached_prod pointer allowing user-space to see the recently filled in +descriptors. The rings are intended to be SPSC, however the existing +order in veth_poll allows the xdp_do_flush() to run concurrently with +another CPU creating a race condition that allows user-space to see old +or uninitialized descriptors in the RX ring. This bug has been observed +in production systems. + +To summarize, we are expecting this ordering: + +CPU 0 __xsk_rcv_zc() +CPU 0 __xsk_map_flush() +CPU 2 __xsk_rcv_zc() +CPU 2 __xsk_map_flush() + +But we are seeing this order: + +CPU 0 __xsk_rcv_zc() +CPU 2 __xsk_rcv_zc() +CPU 0 __xsk_map_flush() +CPU 2 __xsk_map_flush() + +This occurs because we rely on NAPI to ensure that only one napi_poll +handler is running at a time for the given veth receive queue. +napi_schedule_prep() will prevent multiple instances from getting +scheduled. However calling napi_complete_done() signals that this +napi_poll is complete and allows subsequent calls to +napi_schedule_prep() and __napi_schedule() to succeed in scheduling a +concurrent napi_poll before the xdp_do_flush() has been called. For the +veth driver a concurrent call to napi_schedule_prep() and +__napi_schedule() can occur on a different CPU because the veth xmit +path can additionally schedule a napi_poll creating the race. + +The fix as suggested by Magnus Karlsson, is to simply move the +xdp_do_flush() call before napi_complete_done(). This syncs the +producer ring pointers before another instance of napi_poll can be +scheduled on another CPU. It will also slightly improve performance by +moving the flush closer to when the descriptors were placed in the +RX ring. + +Fixes: d1396004dd86 ("veth: Add XDP TX and REDIRECT") +Suggested-by: Magnus Karlsson +Signed-off-by: Shawn Bohrer +Link: https://lore.kernel.org/r/20221220185903.1105011-1-sbohrer@cloudflare.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/veth.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/veth.c b/drivers/net/veth.c +index 5be8ed910553..5aa23a036ed3 100644 +--- a/drivers/net/veth.c ++++ b/drivers/net/veth.c +@@ -849,6 +849,9 @@ static int veth_poll(struct napi_struct *napi, int budget) + xdp_set_return_frame_no_direct(); + done = veth_xdp_rcv(rq, budget, &bq, &stats); + ++ if (stats.xdp_redirect > 0) ++ xdp_do_flush(); ++ + if (done < budget && napi_complete_done(napi, done)) { + /* Write rx_notify_masked before reading ptr_ring */ + smp_store_mb(rq->rx_notify_masked, false); +@@ -862,8 +865,6 @@ static int veth_poll(struct napi_struct *napi, int budget) + + if (stats.xdp_tx > 0) + veth_xdp_flush(rq, &bq); +- if (stats.xdp_redirect > 0) +- xdp_do_flush(); + xdp_clear_return_frame_no_direct(); + + return done; +-- +2.35.1 + diff --git a/queue-5.10/vhost-fix-range-used-in-translate_desc.patch b/queue-5.10/vhost-fix-range-used-in-translate_desc.patch new file mode 100644 index 00000000000..81667e5e212 --- /dev/null +++ b/queue-5.10/vhost-fix-range-used-in-translate_desc.patch @@ -0,0 +1,55 @@ +From 1d66f3722bf0492cb75c49cdaf3f6888a18ffa18 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 11:25:03 +0100 +Subject: vhost: fix range used in translate_desc() + +From: Stefano Garzarella + +[ Upstream commit 98047313cdb46828093894d0ac8b1183b8b317f9 ] + +vhost_iotlb_itree_first() requires `start` and `last` parameters +to search for a mapping that overlaps the range. + +In translate_desc() we cyclically call vhost_iotlb_itree_first(), +incrementing `addr` by the amount already translated, so rightly +we move the `start` parameter passed to vhost_iotlb_itree_first(), +but we should hold the `last` parameter constant. + +Let's fix it by saving the `last` parameter value before incrementing +`addr` in the loop. + +Fixes: a9709d6874d5 ("vhost: convert pre sorted vhost memory array to interval tree") +Acked-by: Jason Wang +Signed-off-by: Stefano Garzarella +Message-Id: <20221109102503.18816-3-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/vhost.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c +index f41463ab4031..da00a5c57db6 100644 +--- a/drivers/vhost/vhost.c ++++ b/drivers/vhost/vhost.c +@@ -2041,7 +2041,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + struct vhost_dev *dev = vq->dev; + struct vhost_iotlb *umem = dev->iotlb ? dev->iotlb : dev->umem; + struct iovec *_iov; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + int ret = 0; + + while ((u64)len > s) { +@@ -2051,7 +2051,7 @@ static int translate_desc(struct vhost_virtqueue *vq, u64 addr, u32 len, + break; + } + +- map = vhost_iotlb_itree_first(umem, addr, addr + len - 1); ++ map = vhost_iotlb_itree_first(umem, addr, last); + if (map == NULL || map->start > addr) { + if (umem != dev->iotlb) { + ret = -EFAULT; +-- +2.35.1 + diff --git a/queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch b/queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch new file mode 100644 index 00000000000..ac43a5c9f51 --- /dev/null +++ b/queue-5.10/vhost-vsock-fix-error-handling-in-vhost_vsock_init.patch @@ -0,0 +1,64 @@ +From c16e2f737ce8ca4b8ca7537af351af8503334f82 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 8 Nov 2022 10:17:05 +0000 +Subject: vhost/vsock: Fix error handling in vhost_vsock_init() + +From: Yuan Can + +[ Upstream commit 7a4efe182ca61fb3e5307e69b261c57cbf434cd4 ] + +A problem about modprobe vhost_vsock failed is triggered with the +following log given: + +modprobe: ERROR: could not insert 'vhost_vsock': Device or resource busy + +The reason is that vhost_vsock_init() returns misc_register() directly +without checking its return value, if misc_register() failed, it returns +without calling vsock_core_unregister() on vhost_transport, resulting the +vhost_vsock can never be installed later. +A simple call graph is shown as below: + + vhost_vsock_init() + vsock_core_register() # register vhost_transport + misc_register() + device_create_with_groups() + device_create_groups_vargs() + dev = kzalloc(...) # OOM happened + # return without unregister vhost_transport + +Fix by calling vsock_core_unregister() when misc_register() returns error. + +Fixes: 433fc58e6bf2 ("VSOCK: Introduce vhost_vsock.ko") +Signed-off-by: Yuan Can +Message-Id: <20221108101705.45981-1-yuancan@huawei.com> +Signed-off-by: Michael S. Tsirkin +Reviewed-by: Stefano Garzarella +Acked-by: Jason Wang +Signed-off-by: Sasha Levin +--- + drivers/vhost/vsock.c | 9 ++++++++- + 1 file changed, 8 insertions(+), 1 deletion(-) + +diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c +index b0153617fe0e..7bce5f982e58 100644 +--- a/drivers/vhost/vsock.c ++++ b/drivers/vhost/vsock.c +@@ -854,7 +854,14 @@ static int __init vhost_vsock_init(void) + VSOCK_TRANSPORT_F_H2G); + if (ret < 0) + return ret; +- return misc_register(&vhost_vsock_misc); ++ ++ ret = misc_register(&vhost_vsock_misc); ++ if (ret) { ++ vsock_core_unregister(&vhost_transport.transport); ++ return ret; ++ } ++ ++ return 0; + }; + + static void __exit vhost_vsock_exit(void) +-- +2.35.1 + diff --git a/queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch b/queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch new file mode 100644 index 00000000000..35e19df5624 --- /dev/null +++ b/queue-5.10/vmxnet3-correctly-report-csum_level-for-encapsulated.patch @@ -0,0 +1,55 @@ +From 9e2579a3f21959106fea0a91504686982c460d28 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Dec 2022 12:25:55 -0800 +Subject: vmxnet3: correctly report csum_level for encapsulated packet + +From: Ronak Doshi + +[ Upstream commit 3d8f2c4269d08f8793e946279dbdf5e972cc4911 ] + +Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload +support") added support for encapsulation offload. However, the +pathc did not report correctly the csum_level for encapsulated packet. + +This patch fixes this issue by reporting correct csum level for the +encapsulated packet. + +Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support") +Signed-off-by: Ronak Doshi +Acked-by: Peng Li +Link: https://lore.kernel.org/r/20221220202556.24421-1-doshir@vmware.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/vmxnet3/vmxnet3_drv.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c +index 43a4bcdd92c1..3b889fed9882 100644 +--- a/drivers/net/vmxnet3/vmxnet3_drv.c ++++ b/drivers/net/vmxnet3/vmxnet3_drv.c +@@ -1236,6 +1236,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + (le32_to_cpu(gdesc->dword[3]) & + VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +@@ -1245,6 +1249,10 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter, + } else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) & + (1 << VMXNET3_RCD_TUC_SHIFT))) { + skb->ip_summed = CHECKSUM_UNNECESSARY; ++ if ((le32_to_cpu(gdesc->dword[0]) & ++ (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))) { ++ skb->csum_level = 1; ++ } + WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) && + !(le32_to_cpu(gdesc->dword[0]) & + (1UL << VMXNET3_RCD_HDR_INNER_SHIFT))); +-- +2.35.1 + diff --git a/queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch b/queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch new file mode 100644 index 00000000000..03ed9bf310c --- /dev/null +++ b/queue-5.10/vringh-fix-range-used-in-iotlb_translate.patch @@ -0,0 +1,56 @@ +From dbaecb214467cc40fbbb4ca94799671c27766dab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 9 Nov 2022 11:25:02 +0100 +Subject: vringh: fix range used in iotlb_translate() + +From: Stefano Garzarella + +[ Upstream commit f85efa9b0f5381874f727bd98f56787840313f0b ] + +vhost_iotlb_itree_first() requires `start` and `last` parameters +to search for a mapping that overlaps the range. + +In iotlb_translate() we cyclically call vhost_iotlb_itree_first(), +incrementing `addr` by the amount already translated, so rightly +we move the `start` parameter passed to vhost_iotlb_itree_first(), +but we should hold the `last` parameter constant. + +Let's fix it by saving the `last` parameter value before incrementing +`addr` in the loop. + +Fixes: 9ad9c49cfe97 ("vringh: IOTLB support") +Acked-by: Jason Wang +Signed-off-by: Stefano Garzarella +Message-Id: <20221109102503.18816-2-sgarzare@redhat.com> +Signed-off-by: Michael S. Tsirkin +Signed-off-by: Sasha Levin +--- + drivers/vhost/vringh.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c +index 5a0340c85dc6..48f4ec2ba40a 100644 +--- a/drivers/vhost/vringh.c ++++ b/drivers/vhost/vringh.c +@@ -1077,7 +1077,7 @@ static int iotlb_translate(const struct vringh *vrh, + struct vhost_iotlb_map *map; + struct vhost_iotlb *iotlb = vrh->iotlb; + int ret = 0; +- u64 s = 0; ++ u64 s = 0, last = addr + len - 1; + + while (len > s) { + u64 size, pa, pfn; +@@ -1087,8 +1087,7 @@ static int iotlb_translate(const struct vringh *vrh, + break; + } + +- map = vhost_iotlb_itree_first(iotlb, addr, +- addr + len - 1); ++ map = vhost_iotlb_itree_first(iotlb, addr, last); + if (!map || map->start > addr) { + ret = -EINVAL; + break; +-- +2.35.1 + diff --git a/queue-5.10/x86-kprobes-convert-to-insn_decode.patch b/queue-5.10/x86-kprobes-convert-to-insn_decode.patch new file mode 100644 index 00000000000..a5d6d2a4123 --- /dev/null +++ b/queue-5.10/x86-kprobes-convert-to-insn_decode.patch @@ -0,0 +1,100 @@ +From 2c70ea9179e1f1d01d70e3724cca2d74a403326e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Nov 2020 18:10:11 +0100 +Subject: x86/kprobes: Convert to insn_decode() + +From: Borislav Petkov + +[ Upstream commit 77e768ec1391dc0d6cd89822aa60b9a1c1bd8128 ] + +Simplify code, improve decoding error checking. + +Signed-off-by: Borislav Petkov +Acked-by: Masami Hiramatsu +Link: https://lkml.kernel.org/r/20210304174237.31945-12-bp@alien8.de +Stable-dep-of: 63dc6325ff41 ("x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK") +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/kprobes/core.c | 17 +++++++++++------ + arch/x86/kernel/kprobes/opt.c | 9 +++++++-- + 2 files changed, 18 insertions(+), 8 deletions(-) + +diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c +index 97e1d2a9898f..5de757099186 100644 +--- a/arch/x86/kernel/kprobes/core.c ++++ b/arch/x86/kernel/kprobes/core.c +@@ -293,6 +293,8 @@ static int can_probe(unsigned long paddr) + /* Decode instructions */ + addr = paddr - offset; + while (addr < paddr) { ++ int ret; ++ + /* + * Check if the instruction has been modified by another + * kprobe, in which case we replace the breakpoint by the +@@ -304,8 +306,10 @@ static int can_probe(unsigned long paddr) + __addr = recover_probed_instruction(buf, addr); + if (!__addr) + return 0; +- kernel_insn_init(&insn, (void *)__addr, MAX_INSN_SIZE); +- insn_get_length(&insn); ++ ++ ret = insn_decode(&insn, (void *)__addr, MAX_INSN_SIZE, INSN_MODE_KERN); ++ if (ret < 0) ++ return 0; + + #ifdef CONFIG_KGDB + /* +@@ -351,8 +355,8 @@ static int is_IF_modifier(kprobe_opcode_t *insn) + int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) + { + kprobe_opcode_t buf[MAX_INSN_SIZE]; +- unsigned long recovered_insn = +- recover_probed_instruction(buf, (unsigned long)src); ++ unsigned long recovered_insn = recover_probed_instruction(buf, (unsigned long)src); ++ int ret; + + if (!recovered_insn || !insn) + return 0; +@@ -362,8 +366,9 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) + MAX_INSN_SIZE)) + return 0; + +- kernel_insn_init(insn, dest, MAX_INSN_SIZE); +- insn_get_length(insn); ++ ret = insn_decode(insn, dest, MAX_INSN_SIZE, INSN_MODE_KERN); ++ if (ret < 0) ++ return 0; + + /* We can not probe force emulate prefixed instruction */ + if (insn_has_emulate_prefix(insn)) +diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c +index 08eb23074f92..4299fc865732 100644 +--- a/arch/x86/kernel/kprobes/opt.c ++++ b/arch/x86/kernel/kprobes/opt.c +@@ -312,6 +312,8 @@ static int can_optimize(unsigned long paddr) + addr = paddr - offset; + while (addr < paddr - offset + size) { /* Decode until function end */ + unsigned long recovered_insn; ++ int ret; ++ + if (search_exception_tables(addr)) + /* + * Since some fixup code will jumps into this function, +@@ -321,8 +323,11 @@ static int can_optimize(unsigned long paddr) + recovered_insn = recover_probed_instruction(buf, addr); + if (!recovered_insn) + return 0; +- kernel_insn_init(&insn, (void *)recovered_insn, MAX_INSN_SIZE); +- insn_get_length(&insn); ++ ++ ret = insn_decode(&insn, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN); ++ if (ret < 0) ++ return 0; ++ + /* + * In the case of detecting unknown breakpoint, this could be + * a padding INT3 between functions. Let's check that all the +-- +2.35.1 + diff --git a/queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch b/queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch new file mode 100644 index 00000000000..88141a09fcb --- /dev/null +++ b/queue-5.10/x86-kprobes-fix-optprobe-optimization-check-with-con.patch @@ -0,0 +1,89 @@ +From 5e1c0e63e98ffedb57bb62dc0183a44cb126f2ad Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Dec 2022 23:35:19 +0900 +Subject: x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK + +From: Masami Hiramatsu (Google) + +[ Upstream commit 63dc6325ff41ee9e570bde705ac34a39c5dbeb44 ] + +Since the CONFIG_RETHUNK and CONFIG_SLS will use INT3 for stopping +speculative execution after function return, kprobe jump optimization +always fails on the functions with such INT3 inside the function body. +(It already checks the INT3 padding between functions, but not inside + the function) + +To avoid this issue, as same as kprobes, check whether the INT3 comes +from kgdb or not, and if so, stop decoding and make it fail. The other +INT3 will come from CONFIG_RETHUNK/CONFIG_SLS and those can be +treated as a one-byte instruction. + +Fixes: e463a09af2f0 ("x86: Add straight-line-speculation mitigation") +Suggested-by: Peter Zijlstra +Signed-off-by: Masami Hiramatsu (Google) +Signed-off-by: Peter Zijlstra (Intel) +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/167146051929.1374301.7419382929328081706.stgit@devnote3 +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/kprobes/opt.c | 28 ++++++++-------------------- + 1 file changed, 8 insertions(+), 20 deletions(-) + +diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c +index 4299fc865732..3d6201492006 100644 +--- a/arch/x86/kernel/kprobes/opt.c ++++ b/arch/x86/kernel/kprobes/opt.c +@@ -15,6 +15,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -272,19 +273,6 @@ static int insn_is_indirect_jump(struct insn *insn) + return ret; + } + +-static bool is_padding_int3(unsigned long addr, unsigned long eaddr) +-{ +- unsigned char ops; +- +- for (; addr < eaddr; addr++) { +- if (get_kernel_nofault(ops, (void *)addr) < 0 || +- ops != INT3_INSN_OPCODE) +- return false; +- } +- +- return true; +-} +- + /* Decode whole function to ensure any instructions don't jump into target */ + static int can_optimize(unsigned long paddr) + { +@@ -327,15 +315,15 @@ static int can_optimize(unsigned long paddr) + ret = insn_decode(&insn, (void *)recovered_insn, MAX_INSN_SIZE, INSN_MODE_KERN); + if (ret < 0) + return 0; +- ++#ifdef CONFIG_KGDB + /* +- * In the case of detecting unknown breakpoint, this could be +- * a padding INT3 between functions. Let's check that all the +- * rest of the bytes are also INT3. ++ * If there is a dynamically installed kgdb sw breakpoint, ++ * this function should not be probed. + */ +- if (insn.opcode.bytes[0] == INT3_INSN_OPCODE) +- return is_padding_int3(addr, paddr - offset + size) ? 1 : 0; +- ++ if (insn.opcode.bytes[0] == INT3_INSN_OPCODE && ++ kgdb_has_hit_break(addr)) ++ return 0; ++#endif + /* Recover address */ + insn.kaddr = (void *)addr; + insn.next_byte = (void *)(addr + insn.length); +-- +2.35.1 +