From: Sasha Levin Date: Fri, 28 Jul 2023 14:38:03 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v5.15.124~90 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=e1717a1d4b71dff042068b6f32fae7bd22cac3c4;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/bcache-fix-__bch_btree_node_alloc-to-make-the-failur.patch b/queue-5.4/bcache-fix-__bch_btree_node_alloc-to-make-the-failur.patch new file mode 100644 index 00000000000..0175898d601 --- /dev/null +++ b/queue-5.4/bcache-fix-__bch_btree_node_alloc-to-make-the-failur.patch @@ -0,0 +1,49 @@ +From e075a8c7d61347356b8a9039538681636d16cf96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 Jun 2023 20:12:22 +0800 +Subject: bcache: Fix __bch_btree_node_alloc to make the failure behavior + consistent + +From: Zheng Wang + +[ Upstream commit 80fca8a10b604afad6c14213fdfd816c4eda3ee4 ] + +In some specific situations, the return value of __bch_btree_node_alloc +may be NULL. This may lead to a potential NULL pointer dereference in +caller function like a calling chain : +btree_split->bch_btree_node_alloc->__bch_btree_node_alloc. + +Fix it by initializing the return value in __bch_btree_node_alloc. + +Fixes: cafe56359144 ("bcache: A block layer cache") +Cc: stable@vger.kernel.org +Signed-off-by: Zheng Wang +Signed-off-by: Coly Li +Link: https://lore.kernel.org/r/20230615121223.22502-6-colyli@suse.de +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + drivers/md/bcache/btree.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index df33062746304..cc0c1f2bba45c 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1137,10 +1137,12 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + struct btree *parent) + { + BKEY_PADDED(key) k; +- struct btree *b = ERR_PTR(-EAGAIN); ++ struct btree *b; + + mutex_lock(&c->bucket_lock); + retry: ++ /* return ERR_PTR(-EAGAIN) when it fails */ ++ b = ERR_PTR(-EAGAIN); + if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) + goto err; + +-- +2.39.2 + diff --git a/queue-5.4/bcache-remove-int-n-from-parameter-list-of-bch_bucke.patch b/queue-5.4/bcache-remove-int-n-from-parameter-list-of-bch_bucke.patch new file mode 100644 index 00000000000..2ffd85916fc --- /dev/null +++ b/queue-5.4/bcache-remove-int-n-from-parameter-list-of-bch_bucke.patch @@ -0,0 +1,158 @@ +From 6d296ce36b19f421d747d280c2ff5b8642c0cf2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Oct 2020 14:50:45 +0800 +Subject: bcache: remove 'int n' from parameter list of bch_bucket_alloc_set() + +From: Coly Li + +[ Upstream commit 17e4aed8309ff28670271546c2c3263eb12f5eb6 ] + +The parameter 'int n' from bch_bucket_alloc_set() is not cleared +defined. From the code comments n is the number of buckets to alloc, but +from the code itself 'n' is the maximum cache to iterate. Indeed all the +locations where bch_bucket_alloc_set() is called, 'n' is alwasy 1. + +This patch removes the confused and unnecessary 'int n' from parameter +list of bch_bucket_alloc_set(), and explicitly allocates only 1 bucket +for its caller. + +Signed-off-by: Coly Li +Reviewed-by: Hannes Reinecke +Signed-off-by: Jens Axboe +Stable-dep-of: 80fca8a10b60 ("bcache: Fix __bch_btree_node_alloc to make the failure behavior consistent") +Signed-off-by: Sasha Levin +--- + drivers/md/bcache/alloc.c | 35 +++++++++++++++-------------------- + drivers/md/bcache/bcache.h | 4 ++-- + drivers/md/bcache/btree.c | 2 +- + drivers/md/bcache/super.c | 2 +- + 4 files changed, 19 insertions(+), 24 deletions(-) + +diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c +index a1df0d95151c6..5310e1f4a2826 100644 +--- a/drivers/md/bcache/alloc.c ++++ b/drivers/md/bcache/alloc.c +@@ -49,7 +49,7 @@ + * + * bch_bucket_alloc() allocates a single bucket from a specific cache. + * +- * bch_bucket_alloc_set() allocates one or more buckets from different caches ++ * bch_bucket_alloc_set() allocates one bucket from different caches + * out of a cache set. + * + * free_some_buckets() drives all the processes described above. It's called +@@ -488,34 +488,29 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k) + } + + int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait) ++ struct bkey *k, bool wait) + { +- int i; ++ struct cache *ca; ++ long b; + + /* No allocation if CACHE_SET_IO_DISABLE bit is set */ + if (unlikely(test_bit(CACHE_SET_IO_DISABLE, &c->flags))) + return -1; + + lockdep_assert_held(&c->bucket_lock); +- BUG_ON(!n || n > c->caches_loaded || n > MAX_CACHES_PER_SET); + + bkey_init(k); + +- /* sort by free space/prio of oldest data in caches */ +- +- for (i = 0; i < n; i++) { +- struct cache *ca = c->cache_by_alloc[i]; +- long b = bch_bucket_alloc(ca, reserve, wait); ++ ca = c->cache_by_alloc[0]; ++ b = bch_bucket_alloc(ca, reserve, wait); ++ if (b == -1) ++ goto err; + +- if (b == -1) +- goto err; ++ k->ptr[0] = MAKE_PTR(ca->buckets[b].gen, ++ bucket_to_sector(c, b), ++ ca->sb.nr_this_dev); + +- k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, +- bucket_to_sector(c, b), +- ca->sb.nr_this_dev); +- +- SET_KEY_PTRS(k, i + 1); +- } ++ SET_KEY_PTRS(k, 1); + + return 0; + err: +@@ -525,12 +520,12 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, + } + + int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait) ++ struct bkey *k, bool wait) + { + int ret; + + mutex_lock(&c->bucket_lock); +- ret = __bch_bucket_alloc_set(c, reserve, k, n, wait); ++ ret = __bch_bucket_alloc_set(c, reserve, k, wait); + mutex_unlock(&c->bucket_lock); + return ret; + } +@@ -638,7 +633,7 @@ bool bch_alloc_sectors(struct cache_set *c, + + spin_unlock(&c->data_bucket_lock); + +- if (bch_bucket_alloc_set(c, watermark, &alloc.key, 1, wait)) ++ if (bch_bucket_alloc_set(c, watermark, &alloc.key, wait)) + return false; + + spin_lock(&c->data_bucket_lock); +diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h +index 36de6f7ddf221..1dd9298cb0e02 100644 +--- a/drivers/md/bcache/bcache.h ++++ b/drivers/md/bcache/bcache.h +@@ -970,9 +970,9 @@ void bch_bucket_free(struct cache_set *c, struct bkey *k); + + long bch_bucket_alloc(struct cache *ca, unsigned int reserve, bool wait); + int __bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait); ++ struct bkey *k, bool wait); + int bch_bucket_alloc_set(struct cache_set *c, unsigned int reserve, +- struct bkey *k, int n, bool wait); ++ struct bkey *k, bool wait); + bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, + unsigned int sectors, unsigned int write_point, + unsigned int write_prio, bool wait); +diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c +index b7fea84d19ad9..df33062746304 100644 +--- a/drivers/md/bcache/btree.c ++++ b/drivers/md/bcache/btree.c +@@ -1141,7 +1141,7 @@ struct btree *__bch_btree_node_alloc(struct cache_set *c, struct btree_op *op, + + mutex_lock(&c->bucket_lock); + retry: +- if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, wait)) ++ if (__bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, wait)) + goto err; + + bkey_put(c, &k.key); +diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c +index 70e46e0d2f1ac..6afaa5e852837 100644 +--- a/drivers/md/bcache/super.c ++++ b/drivers/md/bcache/super.c +@@ -428,7 +428,7 @@ static int __uuid_write(struct cache_set *c) + closure_init_stack(&cl); + lockdep_assert_held(&bch_register_lock); + +- if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, 1, true)) ++ if (bch_bucket_alloc_set(c, RESERVE_BTREE, &k.key, true)) + return 1; + + SET_KEY_SIZE(&k.key, c->sb.bucket_size); +-- +2.39.2 + diff --git a/queue-5.4/btrfs-fix-extent-buffer-leak-after-tree-mod-log-fail.patch b/queue-5.4/btrfs-fix-extent-buffer-leak-after-tree-mod-log-fail.patch new file mode 100644 index 00000000000..81f200174bf --- /dev/null +++ b/queue-5.4/btrfs-fix-extent-buffer-leak-after-tree-mod-log-fail.patch @@ -0,0 +1,42 @@ +From 45c5e4d407f87700bc6e8a37f44eb31c6ee2d1cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Jun 2023 11:27:38 +0100 +Subject: btrfs: fix extent buffer leak after tree mod log failure at + split_node() + +From: Filipe Manana + +[ Upstream commit ede600e497b1461d06d22a7d17703d9096868bc3 ] + +At split_node(), if we fail to log the tree mod log copy operation, we +return without unlocking the split extent buffer we just allocated and +without decrementing the reference we own on it. Fix this by unlocking +it and decrementing the ref count before returning. + +Fixes: 5de865eebb83 ("Btrfs: fix tree mod logging") +CC: stable@vger.kernel.org # 5.4+ +Reviewed-by: Qu Wenruo +Signed-off-by: Filipe Manana +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/ctree.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c +index 1420df997485a..608e41b61689c 100644 +--- a/fs/btrfs/ctree.c ++++ b/fs/btrfs/ctree.c +@@ -3589,6 +3589,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, + + ret = tree_mod_log_eb_copy(split, c, 0, mid, c_nritems - mid); + if (ret) { ++ btrfs_tree_unlock(split); ++ free_extent_buffer(split); + btrfs_abort_transaction(trans, ret); + return ret; + } +-- +2.39.2 + diff --git a/queue-5.4/btrfs-fix-race-between-quota-disable-and-relocation.patch b/queue-5.4/btrfs-fix-race-between-quota-disable-and-relocation.patch new file mode 100644 index 00000000000..6b9913f87a2 --- /dev/null +++ b/queue-5.4/btrfs-fix-race-between-quota-disable-and-relocation.patch @@ -0,0 +1,97 @@ +From 175a9a29703c734ac6cdc652deb0a3fda3444090 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 19 Jun 2023 17:21:50 +0100 +Subject: btrfs: fix race between quota disable and relocation + +From: Filipe Manana + +[ Upstream commit 8a4a0b2a3eaf75ca8854f856ef29690c12b2f531 ] + +If we disable quotas while we have a relocation of a metadata block group +that has extents belonging to the quota root, we can cause the relocation +to fail with -ENOENT. This is because relocation builds backref nodes for +extents of the quota root and later needs to walk the backrefs and access +the quota root - however if in between a task disables quotas, it results +in deleting the quota root from the root tree (with btrfs_del_root(), +called from btrfs_quota_disable(). + +This can be sporadically triggered by test case btrfs/255 from fstests: + + $ ./check btrfs/255 + FSTYP -- btrfs + PLATFORM -- Linux/x86_64 debian0 6.4.0-rc6-btrfs-next-134+ #1 SMP PREEMPT_DYNAMIC Thu Jun 15 11:59:28 WEST 2023 + MKFS_OPTIONS -- /dev/sdc + MOUNT_OPTIONS -- /dev/sdc /home/fdmanana/btrfs-tests/scratch_1 + + btrfs/255 6s ... _check_dmesg: something found in dmesg (see /home/fdmanana/git/hub/xfstests/results//btrfs/255.dmesg) + - output mismatch (see /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad) + --- tests/btrfs/255.out 2023-03-02 21:47:53.876609426 +0000 + +++ /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad 2023-06-16 10:20:39.267563212 +0100 + @@ -1,2 +1,4 @@ + QA output created by 255 + +ERROR: error during balancing '/home/fdmanana/btrfs-tests/scratch_1': No such file or directory + +There may be more info in syslog - try dmesg | tail + Silence is golden + ... + (Run 'diff -u /home/fdmanana/git/hub/xfstests/tests/btrfs/255.out /home/fdmanana/git/hub/xfstests/results//btrfs/255.out.bad' to see the entire diff) + Ran: btrfs/255 + Failures: btrfs/255 + Failed 1 of 1 tests + +To fix this make the quota disable operation take the cleaner mutex, as +relocation of a block group also takes this mutex. This is also what we +do when deleting a subvolume/snapshot, we take the cleaner mutex in the +cleaner kthread (at cleaner_kthread()) and then we call btrfs_del_root() +at btrfs_drop_snapshot() while under the protection of the cleaner mutex. + +Fixes: bed92eae26cc ("Btrfs: qgroup implementation and prototypes") +CC: stable@vger.kernel.org # 5.4+ +Signed-off-by: Filipe Manana +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +--- + fs/btrfs/qgroup.c | 18 +++++++++++++++--- + 1 file changed, 15 insertions(+), 3 deletions(-) + +diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c +index 7821bef061fe6..b6cce67520f04 100644 +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -1164,12 +1164,23 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) + int ret = 0; + + /* +- * We need to have subvol_sem write locked, to prevent races between +- * concurrent tasks trying to disable quotas, because we will unlock +- * and relock qgroup_ioctl_lock across BTRFS_FS_QUOTA_ENABLED changes. ++ * We need to have subvol_sem write locked to prevent races with ++ * snapshot creation. + */ + lockdep_assert_held_write(&fs_info->subvol_sem); + ++ /* ++ * Lock the cleaner mutex to prevent races with concurrent relocation, ++ * because relocation may be building backrefs for blocks of the quota ++ * root while we are deleting the root. This is like dropping fs roots ++ * of deleted snapshots/subvolumes, we need the same protection. ++ * ++ * This also prevents races between concurrent tasks trying to disable ++ * quotas, because we will unlock and relock qgroup_ioctl_lock across ++ * BTRFS_FS_QUOTA_ENABLED changes. ++ */ ++ mutex_lock(&fs_info->cleaner_mutex); ++ + mutex_lock(&fs_info->qgroup_ioctl_lock); + if (!fs_info->quota_root) + goto out; +@@ -1251,6 +1262,7 @@ int btrfs_quota_disable(struct btrfs_fs_info *fs_info) + btrfs_end_transaction(trans); + else if (trans) + ret = btrfs_end_transaction(trans); ++ mutex_unlock(&fs_info->cleaner_mutex); + + return ret; + } +-- +2.39.2 + diff --git a/queue-5.4/btrfs-qgroup-catch-reserved-space-leaks-at-unmount-t.patch b/queue-5.4/btrfs-qgroup-catch-reserved-space-leaks-at-unmount-t.patch new file mode 100644 index 00000000000..3f08a493f52 --- /dev/null +++ b/queue-5.4/btrfs-qgroup-catch-reserved-space-leaks-at-unmount-t.patch @@ -0,0 +1,118 @@ +From c093017f3b670ffa6b4f49d929f1973cc143cddb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jun 2020 09:04:44 +0800 +Subject: btrfs: qgroup: catch reserved space leaks at unmount time + +From: Qu Wenruo + +[ Upstream commit 5958253cf65de42493f17f36877a901486a90365 ] + +Before this patch, qgroup completely relies on per-inode extent io tree +to detect reserved data space leak. + +However previous bug has already shown how release page before +btrfs_finish_ordered_io() could lead to leak, and since it's +QGROUP_RESERVED bit cleared without triggering qgroup rsv, it can't be +detected by per-inode extent io tree. + +So this patch adds another (and hopefully the final) safety net to catch +qgroup data reserved space leak. At least the new safety net catches +all the leaks during development, so it should be pretty useful in the +real world. + +Reviewed-by: Josef Bacik +Signed-off-by: Qu Wenruo +Reviewed-by: David Sterba +Signed-off-by: David Sterba +Stable-dep-of: 8a4a0b2a3eaf ("btrfs: fix race between quota disable and relocation") +Signed-off-by: Sasha Levin +--- + fs/btrfs/disk-io.c | 5 +++++ + fs/btrfs/qgroup.c | 43 +++++++++++++++++++++++++++++++++++++++++++ + fs/btrfs/qgroup.h | 1 + + 3 files changed, 49 insertions(+) + +diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c +index 7e9d914369a02..d98cf8aba753b 100644 +--- a/fs/btrfs/disk-io.c ++++ b/fs/btrfs/disk-io.c +@@ -4106,6 +4106,11 @@ void close_ctree(struct btrfs_fs_info *fs_info) + ASSERT(list_empty(&fs_info->delayed_iputs)); + set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); + ++ if (btrfs_check_quota_leak(fs_info)) { ++ WARN_ON(IS_ENABLED(CONFIG_BTRFS_DEBUG)); ++ btrfs_err(fs_info, "qgroup reserved space leaked"); ++ } ++ + btrfs_free_qgroup_config(fs_info); + ASSERT(list_empty(&fs_info->delalloc_roots)); + +diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c +index db8f83ab55f63..7821bef061fe6 100644 +--- a/fs/btrfs/qgroup.c ++++ b/fs/btrfs/qgroup.c +@@ -504,6 +504,49 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) + return ret < 0 ? ret : 0; + } + ++static u64 btrfs_qgroup_subvolid(u64 qgroupid) ++{ ++ return (qgroupid & ((1ULL << BTRFS_QGROUP_LEVEL_SHIFT) - 1)); ++} ++ ++/* ++ * Called in close_ctree() when quota is still enabled. This verifies we don't ++ * leak some reserved space. ++ * ++ * Return false if no reserved space is left. ++ * Return true if some reserved space is leaked. ++ */ ++bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info) ++{ ++ struct rb_node *node; ++ bool ret = false; ++ ++ if (!test_bit(BTRFS_FS_QUOTA_ENABLED, &fs_info->flags)) ++ return ret; ++ /* ++ * Since we're unmounting, there is no race and no need to grab qgroup ++ * lock. And here we don't go post-order to provide a more user ++ * friendly sorted result. ++ */ ++ for (node = rb_first(&fs_info->qgroup_tree); node; node = rb_next(node)) { ++ struct btrfs_qgroup *qgroup; ++ int i; ++ ++ qgroup = rb_entry(node, struct btrfs_qgroup, node); ++ for (i = 0; i < BTRFS_QGROUP_RSV_LAST; i++) { ++ if (qgroup->rsv.values[i]) { ++ ret = true; ++ btrfs_warn(fs_info, ++ "qgroup %llu/%llu has unreleased space, type %d rsv %llu", ++ btrfs_qgroup_level(qgroup->qgroupid), ++ btrfs_qgroup_subvolid(qgroup->qgroupid), ++ i, qgroup->rsv.values[i]); ++ } ++ } ++ } ++ return ret; ++} ++ + /* + * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(), + * first two are in single-threaded paths.And for the third one, we have set +diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h +index 0a2659685ad65..94bdfb89505e8 100644 +--- a/fs/btrfs/qgroup.h ++++ b/fs/btrfs/qgroup.h +@@ -416,5 +416,6 @@ int btrfs_qgroup_add_swapped_blocks(struct btrfs_trans_handle *trans, + int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *eb); + void btrfs_qgroup_destroy_extent_records(struct btrfs_transaction *trans); ++bool btrfs_check_quota_leak(struct btrfs_fs_info *fs_info); + + #endif +-- +2.39.2 + diff --git a/queue-5.4/dlm-cleanup-plock_op-vs-plock_xop.patch b/queue-5.4/dlm-cleanup-plock_op-vs-plock_xop.patch new file mode 100644 index 00000000000..5779f25e35e --- /dev/null +++ b/queue-5.4/dlm-cleanup-plock_op-vs-plock_xop.patch @@ -0,0 +1,233 @@ +From 8b077bc7d001fe6ff36cf376d3b44c0e0de7367f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Apr 2022 16:06:32 -0400 +Subject: dlm: cleanup plock_op vs plock_xop + +From: Alexander Aring + +[ Upstream commit bcbb4ba6c9ba81e6975b642a2cade68044cd8a66 ] + +Lately the different casting between plock_op and plock_xop and list +holders which was involved showed some issues which were hard to see. +This patch removes the "plock_xop" structure and introduces a +"struct plock_async_data". This structure will be set in "struct plock_op" +in case of asynchronous lock handling as the original "plock_xop" was +made for. There is no need anymore to cast pointers around for +additional fields in case of asynchronous lock handling. As disadvantage +another allocation was introduces but only needed in the asynchronous +case which is currently only used in combination with nfs lockd. + +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Stable-dep-of: 59e45c758ca1 ("fs: dlm: interrupt posix locks only when process is killed") +Signed-off-by: Sasha Levin +--- + fs/dlm/plock.c | 77 ++++++++++++++++++++++++++++++-------------------- + 1 file changed, 46 insertions(+), 31 deletions(-) + +diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c +index edce0b25cd90e..e70e23eca03ec 100644 +--- a/fs/dlm/plock.c ++++ b/fs/dlm/plock.c +@@ -19,20 +19,20 @@ static struct list_head recv_list; + static wait_queue_head_t send_wq; + static wait_queue_head_t recv_wq; + +-struct plock_op { +- struct list_head list; +- int done; +- struct dlm_plock_info info; +- int (*callback)(struct file_lock *fl, int result); +-}; +- +-struct plock_xop { +- struct plock_op xop; ++struct plock_async_data { + void *fl; + void *file; + struct file_lock flc; ++ int (*callback)(struct file_lock *fl, int result); + }; + ++struct plock_op { ++ struct list_head list; ++ int done; ++ struct dlm_plock_info info; ++ /* if set indicates async handling */ ++ struct plock_async_data *data; ++}; + + static inline void set_version(struct dlm_plock_info *info) + { +@@ -58,6 +58,12 @@ static int check_version(struct dlm_plock_info *info) + return 0; + } + ++static void dlm_release_plock_op(struct plock_op *op) ++{ ++ kfree(op->data); ++ kfree(op); ++} ++ + static void send_op(struct plock_op *op) + { + set_version(&op->info); +@@ -101,22 +107,21 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number, + int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + int cmd, struct file_lock *fl) + { ++ struct plock_async_data *op_data; + struct dlm_ls *ls; + struct plock_op *op; +- struct plock_xop *xop; + int rv; + + ls = dlm_find_lockspace_local(lockspace); + if (!ls) + return -EINVAL; + +- xop = kzalloc(sizeof(*xop), GFP_NOFS); +- if (!xop) { ++ op = kzalloc(sizeof(*op), GFP_NOFS); ++ if (!op) { + rv = -ENOMEM; + goto out; + } + +- op = &xop->xop; + op->info.optype = DLM_PLOCK_OP_LOCK; + op->info.pid = fl->fl_pid; + op->info.ex = (fl->fl_type == F_WRLCK); +@@ -125,22 +130,32 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + op->info.number = number; + op->info.start = fl->fl_start; + op->info.end = fl->fl_end; ++ /* async handling */ + if (fl->fl_lmops && fl->fl_lmops->lm_grant) { ++ op_data = kzalloc(sizeof(*op_data), GFP_NOFS); ++ if (!op_data) { ++ dlm_release_plock_op(op); ++ rv = -ENOMEM; ++ goto out; ++ } ++ + /* fl_owner is lockd which doesn't distinguish + processes on the nfs client */ + op->info.owner = (__u64) fl->fl_pid; +- op->callback = fl->fl_lmops->lm_grant; +- locks_init_lock(&xop->flc); +- locks_copy_lock(&xop->flc, fl); +- xop->fl = fl; +- xop->file = file; ++ op_data->callback = fl->fl_lmops->lm_grant; ++ locks_init_lock(&op_data->flc); ++ locks_copy_lock(&op_data->flc, fl); ++ op_data->fl = fl; ++ op_data->file = file; ++ ++ op->data = op_data; + } else { + op->info.owner = (__u64)(long) fl->fl_owner; + } + + send_op(op); + +- if (!op->callback) { ++ if (!op->data) { + rv = wait_event_interruptible(recv_wq, (op->done != 0)); + if (rv == -ERESTARTSYS) { + log_debug(ls, "dlm_posix_lock: wait killed %llx", +@@ -148,7 +163,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + spin_lock(&ops_lock); + list_del(&op->list); + spin_unlock(&ops_lock); +- kfree(xop); ++ dlm_release_plock_op(op); + do_unlock_close(ls, number, file, fl); + goto out; + } +@@ -173,7 +188,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + (unsigned long long)number); + } + +- kfree(xop); ++ dlm_release_plock_op(op); + out: + dlm_put_lockspace(ls); + return rv; +@@ -183,11 +198,11 @@ EXPORT_SYMBOL_GPL(dlm_posix_lock); + /* Returns failure iff a successful lock operation should be canceled */ + static int dlm_plock_callback(struct plock_op *op) + { ++ struct plock_async_data *op_data = op->data; + struct file *file; + struct file_lock *fl; + struct file_lock *flc; + int (*notify)(struct file_lock *fl, int result) = NULL; +- struct plock_xop *xop = (struct plock_xop *)op; + int rv = 0; + + spin_lock(&ops_lock); +@@ -199,10 +214,10 @@ static int dlm_plock_callback(struct plock_op *op) + spin_unlock(&ops_lock); + + /* check if the following 2 are still valid or make a copy */ +- file = xop->file; +- flc = &xop->flc; +- fl = xop->fl; +- notify = op->callback; ++ file = op_data->file; ++ flc = &op_data->flc; ++ fl = op_data->fl; ++ notify = op_data->callback; + + if (op->info.rv) { + notify(fl, op->info.rv); +@@ -233,7 +248,7 @@ static int dlm_plock_callback(struct plock_op *op) + } + + out: +- kfree(xop); ++ dlm_release_plock_op(op); + return rv; + } + +@@ -303,7 +318,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + rv = 0; + + out_free: +- kfree(op); ++ dlm_release_plock_op(op); + out: + dlm_put_lockspace(ls); + fl->fl_flags = fl_flags; +@@ -371,7 +386,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, + rv = 0; + } + +- kfree(op); ++ dlm_release_plock_op(op); + out: + dlm_put_lockspace(ls); + return rv; +@@ -407,7 +422,7 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, + (the process did not make an unlock call). */ + + if (op->info.flags & DLM_PLOCK_FL_CLOSE) +- kfree(op); ++ dlm_release_plock_op(op); + + if (copy_to_user(u, &info, sizeof(info))) + return -EFAULT; +@@ -439,7 +454,7 @@ static ssize_t dev_write(struct file *file, const char __user *u, size_t count, + op->info.owner == info.owner) { + list_del_init(&op->list); + memcpy(&op->info, &info, sizeof(info)); +- if (op->callback) ++ if (op->data) + do_callback = 1; + else + op->done = 1; +-- +2.39.2 + diff --git a/queue-5.4/dlm-rearrange-async-condition-return.patch b/queue-5.4/dlm-rearrange-async-condition-return.patch new file mode 100644 index 00000000000..0c61eee3354 --- /dev/null +++ b/queue-5.4/dlm-rearrange-async-condition-return.patch @@ -0,0 +1,67 @@ +From a2f1e5818ae6bb35d476aa079743e3580f61fe98 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Apr 2022 16:06:33 -0400 +Subject: dlm: rearrange async condition return + +From: Alexander Aring + +[ Upstream commit a800ba77fd285c6391a82819867ac64e9ab3af46 ] + +This patch moves the return of FILE_LOCK_DEFERRED a little bit earlier +than checking afterwards again if the request was an asynchronous request. + +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Stable-dep-of: 59e45c758ca1 ("fs: dlm: interrupt posix locks only when process is killed") +Signed-off-by: Sasha Levin +--- + fs/dlm/plock.c | 27 +++++++++++++-------------- + 1 file changed, 13 insertions(+), 14 deletions(-) + +diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c +index e70e23eca03ec..01fb7d8c0bca5 100644 +--- a/fs/dlm/plock.c ++++ b/fs/dlm/plock.c +@@ -149,26 +149,25 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + op_data->file = file; + + op->data = op_data; ++ ++ send_op(op); ++ rv = FILE_LOCK_DEFERRED; ++ goto out; + } else { + op->info.owner = (__u64)(long) fl->fl_owner; + } + + send_op(op); + +- if (!op->data) { +- rv = wait_event_interruptible(recv_wq, (op->done != 0)); +- if (rv == -ERESTARTSYS) { +- log_debug(ls, "dlm_posix_lock: wait killed %llx", +- (unsigned long long)number); +- spin_lock(&ops_lock); +- list_del(&op->list); +- spin_unlock(&ops_lock); +- dlm_release_plock_op(op); +- do_unlock_close(ls, number, file, fl); +- goto out; +- } +- } else { +- rv = FILE_LOCK_DEFERRED; ++ rv = wait_event_interruptible(recv_wq, (op->done != 0)); ++ if (rv == -ERESTARTSYS) { ++ log_debug(ls, "%s: wait killed %llx", __func__, ++ (unsigned long long)number); ++ spin_lock(&ops_lock); ++ list_del(&op->list); ++ spin_unlock(&ops_lock); ++ dlm_release_plock_op(op); ++ do_unlock_close(ls, number, file, fl); + goto out; + } + +-- +2.39.2 + diff --git a/queue-5.4/ext4-fix-reusing-stale-buffer-heads-from-last-failed.patch b/queue-5.4/ext4-fix-reusing-stale-buffer-heads-from-last-failed.patch new file mode 100644 index 00000000000..0a0b7e4861e --- /dev/null +++ b/queue-5.4/ext4-fix-reusing-stale-buffer-heads-from-last-failed.patch @@ -0,0 +1,126 @@ +From daefe29190ee1e16dc510043ab1d34e7f0757e6c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Mar 2023 09:31:23 +0800 +Subject: ext4: Fix reusing stale buffer heads from last failed mounting + +From: Zhihao Cheng + +[ Upstream commit 26fb5290240dc31cae99b8b4dd2af7f46dfcba6b ] + +Following process makes ext4 load stale buffer heads from last failed +mounting in a new mounting operation: +mount_bdev + ext4_fill_super + | ext4_load_and_init_journal + | ext4_load_journal + | jbd2_journal_load + | load_superblock + | journal_get_superblock + | set_buffer_verified(bh) // buffer head is verified + | jbd2_journal_recover // failed caused by EIO + | goto failed_mount3a // skip 'sb->s_root' initialization + deactivate_locked_super + kill_block_super + generic_shutdown_super + if (sb->s_root) + // false, skip ext4_put_super->invalidate_bdev-> + // invalidate_mapping_pages->mapping_evict_folio-> + // filemap_release_folio->try_to_free_buffers, which + // cannot drop buffer head. + blkdev_put + blkdev_put_whole + if (atomic_dec_and_test(&bdev->bd_openers)) + // false, systemd-udev happens to open the device. Then + // blkdev_flush_mapping->kill_bdev->truncate_inode_pages-> + // truncate_inode_folio->truncate_cleanup_folio-> + // folio_invalidate->block_invalidate_folio-> + // filemap_release_folio->try_to_free_buffers will be skipped, + // dropping buffer head is missed again. + +Second mount: +ext4_fill_super + ext4_load_and_init_journal + ext4_load_journal + ext4_get_journal + jbd2_journal_init_inode + journal_init_common + bh = getblk_unmovable + bh = __find_get_block // Found stale bh in last failed mounting + journal->j_sb_buffer = bh + jbd2_journal_load + load_superblock + journal_get_superblock + if (buffer_verified(bh)) + // true, skip journal->j_format_version = 2, value is 0 + jbd2_journal_recover + do_one_pass + next_log_block += count_tags(journal, bh) + // According to journal_tag_bytes(), 'tag_bytes' calculating is + // affected by jbd2_has_feature_csum3(), jbd2_has_feature_csum3() + // returns false because 'j->j_format_version >= 2' is not true, + // then we get wrong next_log_block. The do_one_pass may exit + // early whenoccuring non JBD2_MAGIC_NUMBER in 'next_log_block'. + +The filesystem is corrupted here, journal is partially replayed, and +new journal sequence number actually is already used by last mounting. + +The invalidate_bdev() can drop all buffer heads even racing with bare +reading block device(eg. systemd-udev), so we can fix it by invalidating +bdev in error handling path in __ext4_fill_super(). + +Fetch a reproducer in [Link]. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217171 +Fixes: 25ed6e8a54df ("jbd2: enable journal clients to enable v2 checksumming") +Cc: stable@vger.kernel.org # v3.5 +Signed-off-by: Zhihao Cheng +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230315013128.3911115-2-chengzhihao1@huawei.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/super.c | 13 +++++++------ + 1 file changed, 7 insertions(+), 6 deletions(-) + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 03b50cd1f4572..8ad3de7846c54 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -908,6 +908,12 @@ static void ext4_blkdev_remove(struct ext4_sb_info *sbi) + struct block_device *bdev; + bdev = sbi->s_journal_bdev; + if (bdev) { ++ /* ++ * Invalidate the journal device's buffers. We don't want them ++ * floating about in memory - the physical journal device may ++ * hotswapped, and it breaks the `ro-after' testing code. ++ */ ++ invalidate_bdev(bdev); + ext4_blkdev_put(bdev); + sbi->s_journal_bdev = NULL; + } +@@ -1035,13 +1041,7 @@ static void ext4_put_super(struct super_block *sb) + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); + if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) { +- /* +- * Invalidate the journal device's buffers. We don't want them +- * floating about in memory - the physical journal device may +- * hotswapped, and it breaks the `ro-after' testing code. +- */ + sync_blockdev(sbi->s_journal_bdev); +- invalidate_bdev(sbi->s_journal_bdev); + ext4_blkdev_remove(sbi); + } + +@@ -4777,6 +4777,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) + ext4_blkdev_remove(sbi); + brelse(bh); + out_fail: ++ invalidate_bdev(sb->s_bdev); + sb->s_fs_info = NULL; + kfree(sbi->s_blockgroup_lock); + out_free_base: +-- +2.39.2 + diff --git a/queue-5.4/ext4-rename-journal_dev-to-s_journal_dev-inside-ext4.patch b/queue-5.4/ext4-rename-journal_dev-to-s_journal_dev-inside-ext4.patch new file mode 100644 index 00000000000..9d7040ff06d --- /dev/null +++ b/queue-5.4/ext4-rename-journal_dev-to-s_journal_dev-inside-ext4.patch @@ -0,0 +1,122 @@ +From 3bd0f51c74aa83f0950fc77f27154bab348db672 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 24 Sep 2020 11:03:42 +0800 +Subject: ext4: rename journal_dev to s_journal_dev inside ext4_sb_info + +From: Chunguang Xu + +[ Upstream commit ee7ed3aa0f08621dbf897d2a98dc6f2c7e7d0335 ] + +Rename journal_dev to s_journal_dev inside ext4_sb_info, keep +the naming rules consistent with other variables, which is +convenient for code reading and writing. + +Signed-off-by: Chunguang Xu +Reviewed-by: Andreas Dilger +Reviewed-by: Ritesh Harjani +Link: https://lore.kernel.org/r/1600916623-544-1-git-send-email-brookxu@tencent.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 26fb5290240d ("ext4: Fix reusing stale buffer heads from last failed mounting") +Signed-off-by: Sasha Levin +--- + fs/ext4/ext4.h | 2 +- + fs/ext4/fsmap.c | 8 ++++---- + fs/ext4/super.c | 14 +++++++------- + 3 files changed, 12 insertions(+), 12 deletions(-) + +diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h +index 9d86cf3a09bf7..604fef3b2ddf4 100644 +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1441,7 +1441,7 @@ struct ext4_sb_info { + unsigned long s_commit_interval; + u32 s_max_batch_time; + u32 s_min_batch_time; +- struct block_device *journal_bdev; ++ struct block_device *s_journal_bdev; + #ifdef CONFIG_QUOTA + /* Names of quota files with journalled quota */ + char __rcu *s_qf_names[EXT4_MAXQUOTAS]; +diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c +index d1ef651948d7e..d18c4cd4c63ff 100644 +--- a/fs/ext4/fsmap.c ++++ b/fs/ext4/fsmap.c +@@ -576,8 +576,8 @@ static bool ext4_getfsmap_is_valid_device(struct super_block *sb, + if (fm->fmr_device == 0 || fm->fmr_device == UINT_MAX || + fm->fmr_device == new_encode_dev(sb->s_bdev->bd_dev)) + return true; +- if (EXT4_SB(sb)->journal_bdev && +- fm->fmr_device == new_encode_dev(EXT4_SB(sb)->journal_bdev->bd_dev)) ++ if (EXT4_SB(sb)->s_journal_bdev && ++ fm->fmr_device == new_encode_dev(EXT4_SB(sb)->s_journal_bdev->bd_dev)) + return true; + return false; + } +@@ -647,9 +647,9 @@ int ext4_getfsmap(struct super_block *sb, struct ext4_fsmap_head *head, + memset(handlers, 0, sizeof(handlers)); + handlers[0].gfd_dev = new_encode_dev(sb->s_bdev->bd_dev); + handlers[0].gfd_fn = ext4_getfsmap_datadev; +- if (EXT4_SB(sb)->journal_bdev) { ++ if (EXT4_SB(sb)->s_journal_bdev) { + handlers[1].gfd_dev = new_encode_dev( +- EXT4_SB(sb)->journal_bdev->bd_dev); ++ EXT4_SB(sb)->s_journal_bdev->bd_dev); + handlers[1].gfd_fn = ext4_getfsmap_logdev; + } + +diff --git a/fs/ext4/super.c b/fs/ext4/super.c +index 9bbd525086562..03b50cd1f4572 100644 +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -906,10 +906,10 @@ static void ext4_blkdev_put(struct block_device *bdev) + static void ext4_blkdev_remove(struct ext4_sb_info *sbi) + { + struct block_device *bdev; +- bdev = sbi->journal_bdev; ++ bdev = sbi->s_journal_bdev; + if (bdev) { + ext4_blkdev_put(bdev); +- sbi->journal_bdev = NULL; ++ sbi->s_journal_bdev = NULL; + } + } + +@@ -1034,14 +1034,14 @@ static void ext4_put_super(struct super_block *sb) + + sync_blockdev(sb->s_bdev); + invalidate_bdev(sb->s_bdev); +- if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) { ++ if (sbi->s_journal_bdev && sbi->s_journal_bdev != sb->s_bdev) { + /* + * Invalidate the journal device's buffers. We don't want them + * floating about in memory - the physical journal device may + * hotswapped, and it breaks the `ro-after' testing code. + */ +- sync_blockdev(sbi->journal_bdev); +- invalidate_bdev(sbi->journal_bdev); ++ sync_blockdev(sbi->s_journal_bdev); ++ invalidate_bdev(sbi->s_journal_bdev); + ext4_blkdev_remove(sbi); + } + +@@ -3582,7 +3582,7 @@ int ext4_calculate_overhead(struct super_block *sb) + * Add the internal journal blocks whether the journal has been + * loaded or not + */ +- if (sbi->s_journal && !sbi->journal_bdev) ++ if (sbi->s_journal && !sbi->s_journal_bdev) + overhead += EXT4_NUM_B2C(sbi, sbi->s_journal->j_maxlen); + else if (ext4_has_feature_journal(sb) && !sbi->s_journal && j_inum) { + /* j_inum for internal journal is non-zero */ +@@ -4952,7 +4952,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb, + be32_to_cpu(journal->j_superblock->s_nr_users)); + goto out_journal; + } +- EXT4_SB(sb)->journal_bdev = bdev; ++ EXT4_SB(sb)->s_journal_bdev = bdev; + ext4_init_journal_params(sb, journal); + return journal; + +-- +2.39.2 + diff --git a/queue-5.4/fs-dlm-interrupt-posix-locks-only-when-process-is-ki.patch b/queue-5.4/fs-dlm-interrupt-posix-locks-only-when-process-is-ki.patch new file mode 100644 index 00000000000..42454d26e5c --- /dev/null +++ b/queue-5.4/fs-dlm-interrupt-posix-locks-only-when-process-is-ki.patch @@ -0,0 +1,44 @@ +From a35078c7a7bdf250189226d21c3d948c38b0890e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 May 2023 11:21:26 -0400 +Subject: fs: dlm: interrupt posix locks only when process is killed + +From: Alexander Aring + +[ Upstream commit 59e45c758ca1b9893ac923dd63536da946ac333b ] + +If a posix lock request is waiting for a result from user space +(dlm_controld), do not let it be interrupted unless the process +is killed. This reverts commit a6b1533e9a57 ("dlm: make posix locks +interruptible"). The problem with the interruptible change is +that all locks were cleared on any signal interrupt. If a signal +was received that did not terminate the process, the process +could continue running after all its dlm posix locks had been +cleared. A future patch will add cancelation to allow proper +interruption. + +Cc: stable@vger.kernel.org +Fixes: a6b1533e9a57 ("dlm: make posix locks interruptible") +Signed-off-by: Alexander Aring +Signed-off-by: David Teigland +Signed-off-by: Sasha Levin +--- + fs/dlm/plock.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c +index 01fb7d8c0bca5..f3482e936cc25 100644 +--- a/fs/dlm/plock.c ++++ b/fs/dlm/plock.c +@@ -159,7 +159,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, + + send_op(op); + +- rv = wait_event_interruptible(recv_wq, (op->done != 0)); ++ rv = wait_event_killable(recv_wq, (op->done != 0)); + if (rv == -ERESTARTSYS) { + log_debug(ls, "%s: wait killed %llx", __func__, + (unsigned long long)number); +-- +2.39.2 + diff --git a/queue-5.4/ftrace-add-information-on-number-of-page-groups-allo.patch b/queue-5.4/ftrace-add-information-on-number-of-page-groups-allo.patch new file mode 100644 index 00000000000..da410252ebf --- /dev/null +++ b/queue-5.4/ftrace-add-information-on-number-of-page-groups-allo.patch @@ -0,0 +1,144 @@ +From e8f44e76e8221355ea66b99b59f0af2658a7a379 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Oct 2019 14:38:07 -0400 +Subject: ftrace: Add information on number of page groups allocated + +From: Steven Rostedt (VMware) + +[ Upstream commit da537f0aef1372c5204356a7df06be8769467b7b ] + +Looking for ways to shrink the size of the dyn_ftrace structure, knowing the +information about how many pages and the number of groups of those pages, is +useful in working out the best ways to save on memory. + +This adds one info print on how many groups of pages were used to allocate +the ftrace dyn_ftrace structures, and also shows the number of pages and +groups in the dyn_ftrace_total_info (which is used for debugging). + +Signed-off-by: Steven Rostedt (VMware) +Stable-dep-of: 26efd79c4624 ("ftrace: Fix possible warning on checking all pages used in ftrace_process_locs()") +Signed-off-by: Sasha Levin +--- + kernel/trace/ftrace.c | 14 ++++++++++++++ + kernel/trace/trace.c | 21 +++++++++++++++------ + kernel/trace/trace.h | 2 ++ + 3 files changed, 31 insertions(+), 6 deletions(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 8e3c76dcc0ffe..97d615988ea35 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -2899,6 +2899,8 @@ static void ftrace_shutdown_sysctl(void) + + static u64 ftrace_update_time; + unsigned long ftrace_update_tot_cnt; ++unsigned long ftrace_number_of_pages; ++unsigned long ftrace_number_of_groups; + + static inline int ops_traces_mod(struct ftrace_ops *ops) + { +@@ -3023,6 +3025,9 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) + goto again; + } + ++ ftrace_number_of_pages += 1 << order; ++ ftrace_number_of_groups++; ++ + cnt = (PAGE_SIZE << order) / ENTRY_SIZE; + pg->size = cnt; + +@@ -3078,6 +3083,8 @@ ftrace_allocate_pages(unsigned long num_to_init) + start_pg = pg->next; + kfree(pg); + pg = start_pg; ++ ftrace_number_of_pages -= 1 << order; ++ ftrace_number_of_groups--; + } + pr_info("ftrace: FAILED to allocate memory for functions\n"); + return NULL; +@@ -5873,6 +5880,8 @@ void ftrace_release_mod(struct module *mod) + free_pages((unsigned long)pg->records, order); + tmp_page = pg->next; + kfree(pg); ++ ftrace_number_of_pages -= 1 << order; ++ ftrace_number_of_groups--; + } + } + +@@ -6214,6 +6223,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) + *last_pg = pg->next; + order = get_count_order(pg->size / ENTRIES_PER_PAGE); + free_pages((unsigned long)pg->records, order); ++ ftrace_number_of_pages -= 1 << order; ++ ftrace_number_of_groups--; + kfree(pg); + pg = container_of(last_pg, struct ftrace_page, next); + if (!(*last_pg)) +@@ -6269,6 +6280,9 @@ void __init ftrace_init(void) + __start_mcount_loc, + __stop_mcount_loc); + ++ pr_info("ftrace: allocated %ld pages with %ld groups\n", ++ ftrace_number_of_pages, ftrace_number_of_groups); ++ + set_ftrace_early_filters(); + + return; +diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c +index 7f7c700a61560..8006592803e1c 100644 +--- a/kernel/trace/trace.c ++++ b/kernel/trace/trace.c +@@ -7662,14 +7662,23 @@ static ssize_t + tracing_read_dyn_info(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) + { +- unsigned long *p = filp->private_data; +- char buf[64]; /* Not too big for a shallow stack */ ++ ssize_t ret; ++ char *buf; + int r; + +- r = scnprintf(buf, 63, "%ld", *p); +- buf[r++] = '\n'; ++ /* 256 should be plenty to hold the amount needed */ ++ buf = kmalloc(256, GFP_KERNEL); ++ if (!buf) ++ return -ENOMEM; + +- return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++ r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n", ++ ftrace_update_tot_cnt, ++ ftrace_number_of_pages, ++ ftrace_number_of_groups); ++ ++ ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r); ++ kfree(buf); ++ return ret; + } + + static const struct file_operations tracing_dyn_info_fops = { +@@ -8889,7 +8898,7 @@ static __init int tracer_init_tracefs(void) + + #ifdef CONFIG_DYNAMIC_FTRACE + trace_create_file("dyn_ftrace_total_info", 0444, d_tracer, +- &ftrace_update_tot_cnt, &tracing_dyn_info_fops); ++ NULL, &tracing_dyn_info_fops); + #endif + + create_trace_instances(d_tracer); +diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h +index edc17a640ab34..21f85c0bd66ec 100644 +--- a/kernel/trace/trace.h ++++ b/kernel/trace/trace.h +@@ -801,6 +801,8 @@ extern void trace_event_follow_fork(struct trace_array *tr, bool enable); + + #ifdef CONFIG_DYNAMIC_FTRACE + extern unsigned long ftrace_update_tot_cnt; ++extern unsigned long ftrace_number_of_pages; ++extern unsigned long ftrace_number_of_groups; + void ftrace_init_trace_array(struct trace_array *tr); + #else + static inline void ftrace_init_trace_array(struct trace_array *tr) { } +-- +2.39.2 + diff --git a/queue-5.4/ftrace-check-if-pages-were-allocated-before-calling-.patch b/queue-5.4/ftrace-check-if-pages-were-allocated-before-calling-.patch new file mode 100644 index 00000000000..06d2a629d42 --- /dev/null +++ b/queue-5.4/ftrace-check-if-pages-were-allocated-before-calling-.patch @@ -0,0 +1,61 @@ +From 01d7511a8867dba2ef5cce03acaac60a38e575a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Mar 2021 09:58:38 -0400 +Subject: ftrace: Check if pages were allocated before calling free_pages() + +From: Steven Rostedt (VMware) + +[ Upstream commit 59300b36f85f254260c81d9dd09195fa49eb0f98 ] + +It is possible that on error pg->size can be zero when getting its order, +which would return a -1 value. It is dangerous to pass in an order of -1 +to free_pages(). Check if order is greater than or equal to zero before +calling free_pages(). + +Link: https://lore.kernel.org/lkml/20210330093916.432697c7@gandalf.local.home/ + +Reported-by: Abaci Robot +Signed-off-by: Steven Rostedt (VMware) +Stable-dep-of: 26efd79c4624 ("ftrace: Fix possible warning on checking all pages used in ftrace_process_locs()") +Signed-off-by: Sasha Levin +--- + kernel/trace/ftrace.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 97d615988ea35..5f5a766bf73bb 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3079,7 +3079,8 @@ ftrace_allocate_pages(unsigned long num_to_init) + pg = start_pg; + while (pg) { + order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- free_pages((unsigned long)pg->records, order); ++ if (order >= 0) ++ free_pages((unsigned long)pg->records, order); + start_pg = pg->next; + kfree(pg); + pg = start_pg; +@@ -5877,7 +5878,8 @@ void ftrace_release_mod(struct module *mod) + clear_mod_from_hashes(pg); + + order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- free_pages((unsigned long)pg->records, order); ++ if (order >= 0) ++ free_pages((unsigned long)pg->records, order); + tmp_page = pg->next; + kfree(pg); + ftrace_number_of_pages -= 1 << order; +@@ -6222,7 +6224,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) + if (!pg->index) { + *last_pg = pg->next; + order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- free_pages((unsigned long)pg->records, order); ++ if (order >= 0) ++ free_pages((unsigned long)pg->records, order); + ftrace_number_of_pages -= 1 << order; + ftrace_number_of_groups--; + kfree(pg); +-- +2.39.2 + diff --git a/queue-5.4/ftrace-fix-possible-warning-on-checking-all-pages-us.patch b/queue-5.4/ftrace-fix-possible-warning-on-checking-all-pages-us.patch new file mode 100644 index 00000000000..383169e2efe --- /dev/null +++ b/queue-5.4/ftrace-fix-possible-warning-on-checking-all-pages-us.patch @@ -0,0 +1,137 @@ +From 7e5dec206b49d9eb7381362f5a619aaf20226786 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jul 2023 14:04:52 +0800 +Subject: ftrace: Fix possible warning on checking all pages used in + ftrace_process_locs() + +From: Zheng Yejian + +[ Upstream commit 26efd79c4624294e553aeaa3439c646729bad084 ] + +As comments in ftrace_process_locs(), there may be NULL pointers in +mcount_loc section: + > Some architecture linkers will pad between + > the different mcount_loc sections of different + > object files to satisfy alignments. + > Skip any NULL pointers. + +After commit 20e5227e9f55 ("ftrace: allow NULL pointers in mcount_loc"), +NULL pointers will be accounted when allocating ftrace pages but skipped +before adding into ftrace pages, this may result in some pages not being +used. Then after commit 706c81f87f84 ("ftrace: Remove extra helper +functions"), warning may occur at: + WARN_ON(pg->next); + +To fix it, only warn for case that no pointers skipped but pages not used +up, then free those unused pages after releasing ftrace_lock. + +Link: https://lore.kernel.org/linux-trace-kernel/20230712060452.3175675-1-zhengyejian1@huawei.com + +Cc: stable@vger.kernel.org +Fixes: 706c81f87f84 ("ftrace: Remove extra helper functions") +Suggested-by: Steven Rostedt +Signed-off-by: Zheng Yejian +Signed-off-by: Steven Rostedt (Google) +Signed-off-by: Sasha Levin +--- + kernel/trace/ftrace.c | 45 +++++++++++++++++++++++++++++-------------- + 1 file changed, 31 insertions(+), 14 deletions(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index aa2530cbcb8fd..412505d948651 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -3037,6 +3037,22 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) + return cnt; + } + ++static void ftrace_free_pages(struct ftrace_page *pages) ++{ ++ struct ftrace_page *pg = pages; ++ ++ while (pg) { ++ if (pg->records) { ++ free_pages((unsigned long)pg->records, pg->order); ++ ftrace_number_of_pages -= 1 << pg->order; ++ } ++ pages = pg->next; ++ kfree(pg); ++ pg = pages; ++ ftrace_number_of_groups--; ++ } ++} ++ + static struct ftrace_page * + ftrace_allocate_pages(unsigned long num_to_init) + { +@@ -3075,17 +3091,7 @@ ftrace_allocate_pages(unsigned long num_to_init) + return start_pg; + + free_pages: +- pg = start_pg; +- while (pg) { +- if (pg->records) { +- free_pages((unsigned long)pg->records, pg->order); +- ftrace_number_of_pages -= 1 << pg->order; +- } +- start_pg = pg->next; +- kfree(pg); +- pg = start_pg; +- ftrace_number_of_groups--; +- } ++ ftrace_free_pages(start_pg); + pr_info("ftrace: FAILED to allocate memory for functions\n"); + return NULL; + } +@@ -5627,9 +5633,11 @@ static int ftrace_process_locs(struct module *mod, + unsigned long *start, + unsigned long *end) + { ++ struct ftrace_page *pg_unuse = NULL; + struct ftrace_page *start_pg; + struct ftrace_page *pg; + struct dyn_ftrace *rec; ++ unsigned long skipped = 0; + unsigned long count; + unsigned long *p; + unsigned long addr; +@@ -5683,8 +5691,10 @@ static int ftrace_process_locs(struct module *mod, + * object files to satisfy alignments. + * Skip any NULL pointers. + */ +- if (!addr) ++ if (!addr) { ++ skipped++; + continue; ++ } + + end_offset = (pg->index+1) * sizeof(pg->records[0]); + if (end_offset > PAGE_SIZE << pg->order) { +@@ -5698,8 +5708,10 @@ static int ftrace_process_locs(struct module *mod, + rec->ip = addr; + } + +- /* We should have used all pages */ +- WARN_ON(pg->next); ++ if (pg->next) { ++ pg_unuse = pg->next; ++ pg->next = NULL; ++ } + + /* Assign the last page to ftrace_pages */ + ftrace_pages = pg; +@@ -5721,6 +5733,11 @@ static int ftrace_process_locs(struct module *mod, + out: + mutex_unlock(&ftrace_lock); + ++ /* We should have used all pages unless we skipped some */ ++ if (pg_unuse) { ++ WARN_ON(!skipped); ++ ftrace_free_pages(pg_unuse); ++ } + return ret; + } + +-- +2.39.2 + diff --git a/queue-5.4/ftrace-store-the-order-of-pages-allocated-in-ftrace_.patch b/queue-5.4/ftrace-store-the-order-of-pages-allocated-in-ftrace_.patch new file mode 100644 index 00000000000..a3f99bf692a --- /dev/null +++ b/queue-5.4/ftrace-store-the-order-of-pages-allocated-in-ftrace_.patch @@ -0,0 +1,141 @@ +From 5d879a6d79b96223a9a3018a507c46d747d8c77c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Apr 2021 16:14:17 -0400 +Subject: ftrace: Store the order of pages allocated in ftrace_page + +From: Linus Torvalds + +[ Upstream commit db42523b4f3e83ff86b53cdda219a9767c8b047f ] + +Instead of saving the size of the records field of the ftrace_page, store +the order it uses to allocate the pages, as that is what is needed to know +in order to free the pages. This simplifies the code. + +Link: https://lore.kernel.org/lkml/CAHk-=whyMxheOqXAORt9a7JK9gc9eHTgCJ55Pgs4p=X3RrQubQ@mail.gmail.com/ + +Signed-off-by: Linus Torvalds +[ change log written by Steven Rostedt ] +Signed-off-by: Steven Rostedt (VMware) +Stable-dep-of: 26efd79c4624 ("ftrace: Fix possible warning on checking all pages used in ftrace_process_locs()") +Signed-off-by: Sasha Levin +--- + kernel/trace/ftrace.c | 35 +++++++++++++++++------------------ + 1 file changed, 17 insertions(+), 18 deletions(-) + +diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c +index 5f5a766bf73bb..aa2530cbcb8fd 100644 +--- a/kernel/trace/ftrace.c ++++ b/kernel/trace/ftrace.c +@@ -1100,7 +1100,7 @@ struct ftrace_page { + struct ftrace_page *next; + struct dyn_ftrace *records; + int index; +- int size; ++ int order; + }; + + #define ENTRY_SIZE sizeof(struct dyn_ftrace) +@@ -3029,7 +3029,7 @@ static int ftrace_allocate_records(struct ftrace_page *pg, int count) + ftrace_number_of_groups++; + + cnt = (PAGE_SIZE << order) / ENTRY_SIZE; +- pg->size = cnt; ++ pg->order = order; + + if (cnt > count) + cnt = count; +@@ -3042,7 +3042,6 @@ ftrace_allocate_pages(unsigned long num_to_init) + { + struct ftrace_page *start_pg; + struct ftrace_page *pg; +- int order; + int cnt; + + if (!num_to_init) +@@ -3078,13 +3077,13 @@ ftrace_allocate_pages(unsigned long num_to_init) + free_pages: + pg = start_pg; + while (pg) { +- order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- if (order >= 0) +- free_pages((unsigned long)pg->records, order); ++ if (pg->records) { ++ free_pages((unsigned long)pg->records, pg->order); ++ ftrace_number_of_pages -= 1 << pg->order; ++ } + start_pg = pg->next; + kfree(pg); + pg = start_pg; +- ftrace_number_of_pages -= 1 << order; + ftrace_number_of_groups--; + } + pr_info("ftrace: FAILED to allocate memory for functions\n"); +@@ -5676,6 +5675,7 @@ static int ftrace_process_locs(struct module *mod, + p = start; + pg = start_pg; + while (p < end) { ++ unsigned long end_offset; + addr = ftrace_call_adjust(*p++); + /* + * Some architecture linkers will pad between +@@ -5686,7 +5686,8 @@ static int ftrace_process_locs(struct module *mod, + if (!addr) + continue; + +- if (pg->index == pg->size) { ++ end_offset = (pg->index+1) * sizeof(pg->records[0]); ++ if (end_offset > PAGE_SIZE << pg->order) { + /* We should have allocated enough */ + if (WARN_ON(!pg->next)) + break; +@@ -5826,7 +5827,6 @@ void ftrace_release_mod(struct module *mod) + struct ftrace_page **last_pg; + struct ftrace_page *tmp_page = NULL; + struct ftrace_page *pg; +- int order; + + mutex_lock(&ftrace_lock); + +@@ -5877,12 +5877,12 @@ void ftrace_release_mod(struct module *mod) + /* Needs to be called outside of ftrace_lock */ + clear_mod_from_hashes(pg); + +- order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- if (order >= 0) +- free_pages((unsigned long)pg->records, order); ++ if (pg->records) { ++ free_pages((unsigned long)pg->records, pg->order); ++ ftrace_number_of_pages -= 1 << pg->order; ++ } + tmp_page = pg->next; + kfree(pg); +- ftrace_number_of_pages -= 1 << order; + ftrace_number_of_groups--; + } + } +@@ -6185,7 +6185,6 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) + struct ftrace_mod_map *mod_map = NULL; + struct ftrace_init_func *func, *func_next; + struct list_head clear_hash; +- int order; + + INIT_LIST_HEAD(&clear_hash); + +@@ -6223,10 +6222,10 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr) + ftrace_update_tot_cnt--; + if (!pg->index) { + *last_pg = pg->next; +- order = get_count_order(pg->size / ENTRIES_PER_PAGE); +- if (order >= 0) +- free_pages((unsigned long)pg->records, order); +- ftrace_number_of_pages -= 1 << order; ++ if (pg->records) { ++ free_pages((unsigned long)pg->records, pg->order); ++ ftrace_number_of_pages -= 1 << pg->order; ++ } + ftrace_number_of_groups--; + kfree(pg); + pg = container_of(last_pg, struct ftrace_page, next); +-- +2.39.2 + diff --git a/queue-5.4/gpio-tps68470-make-tps68470_gpio_output-always-set-t.patch b/queue-5.4/gpio-tps68470-make-tps68470_gpio_output-always-set-t.patch new file mode 100644 index 00000000000..04679f50fc7 --- /dev/null +++ b/queue-5.4/gpio-tps68470-make-tps68470_gpio_output-always-set-t.patch @@ -0,0 +1,50 @@ +From a5d0e192f5e71f581dcc8b07a34a16c8ffd31c56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 10 Jul 2023 14:34:25 +0200 +Subject: gpio: tps68470: Make tps68470_gpio_output() always set the initial + value + +From: Hans de Goede + +[ Upstream commit 5a7adc6c1069ce31ef4f606ae9c05592c80a6ab5 ] + +Make tps68470_gpio_output() call tps68470_gpio_set() for output-only pins +too, so that the initial value passed to gpiod_direction_output() is +honored for these pins too. + +Fixes: 275b13a65547 ("gpio: Add support for TPS68470 GPIOs") +Reviewed-by: Andy Shevchenko +Reviewed-by: Daniel Scally +Tested-by: Daniel Scally +Reviewed-by: Sakari Ailus +Signed-off-by: Hans de Goede +Signed-off-by: Bartosz Golaszewski +Signed-off-by: Sasha Levin +--- + drivers/gpio/gpio-tps68470.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpio/gpio-tps68470.c b/drivers/gpio/gpio-tps68470.c +index aff6e504c6668..9704cff9b4aa3 100644 +--- a/drivers/gpio/gpio-tps68470.c ++++ b/drivers/gpio/gpio-tps68470.c +@@ -91,13 +91,13 @@ static int tps68470_gpio_output(struct gpio_chip *gc, unsigned int offset, + struct tps68470_gpio_data *tps68470_gpio = gpiochip_get_data(gc); + struct regmap *regmap = tps68470_gpio->tps68470_regmap; + ++ /* Set the initial value */ ++ tps68470_gpio_set(gc, offset, value); ++ + /* rest are always outputs */ + if (offset >= TPS68470_N_REGULAR_GPIO) + return 0; + +- /* Set the initial value */ +- tps68470_gpio_set(gc, offset, value); +- + return regmap_update_bits(regmap, TPS68470_GPIO_CTL_REG_A(offset), + TPS68470_GPIO_MODE_MASK, + TPS68470_GPIO_MODE_OUT_CMOS); +-- +2.39.2 + diff --git a/queue-5.4/jbd2-fix-incorrect-code-style.patch b/queue-5.4/jbd2-fix-incorrect-code-style.patch new file mode 100644 index 00000000000..87d9d41878a --- /dev/null +++ b/queue-5.4/jbd2-fix-incorrect-code-style.patch @@ -0,0 +1,82 @@ +From eca6e46b2c79ff3938893e80cbb02e864ec2bbb8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 18 Jul 2020 08:57:37 -0400 +Subject: jbd2: fix incorrect code style + +From: Xianting Tian + +[ Upstream commit 60ed633f51d0c675150a117d96a45e78c3613f91 ] + +Remove unnecessary blank. + +Signed-off-by: Xianting Tian +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/1595077057-8048-1-git-send-email-xianting_tian@126.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint") +Signed-off-by: Sasha Levin +--- + fs/jbd2/journal.c | 12 ++++++------ + 1 file changed, 6 insertions(+), 6 deletions(-) + +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index b7c5819bfc411..1fa88b5bb1cb0 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -1266,7 +1266,7 @@ journal_t *jbd2_journal_init_inode(struct inode *inode) + * superblock as being NULL to prevent the journal destroy from writing + * back a bogus superblock. + */ +-static void journal_fail_superblock (journal_t *journal) ++static void journal_fail_superblock(journal_t *journal) + { + struct buffer_head *bh = journal->j_sb_buffer; + brelse(bh); +@@ -1780,7 +1780,7 @@ int jbd2_journal_destroy(journal_t *journal) + + + /** +- *int jbd2_journal_check_used_features () - Check if features specified are used. ++ *int jbd2_journal_check_used_features() - Check if features specified are used. + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount +@@ -1790,7 +1790,7 @@ int jbd2_journal_destroy(journal_t *journal) + * features. Return true (non-zero) if it does. + **/ + +-int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, ++int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) + { + journal_superblock_t *sb; +@@ -1825,7 +1825,7 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, + * all of a given set of features on this journal. Return true + * (non-zero) if it can. */ + +-int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat, ++int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) + { + if (!compat && !ro && !incompat) +@@ -1847,7 +1847,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com + } + + /** +- * int jbd2_journal_set_features () - Mark a given journal feature in the superblock ++ * int jbd2_journal_set_features() - Mark a given journal feature in the superblock + * @journal: Journal to act on. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount +@@ -1858,7 +1858,7 @@ int jbd2_journal_check_available_features (journal_t *journal, unsigned long com + * + */ + +-int jbd2_journal_set_features (journal_t *journal, unsigned long compat, ++int jbd2_journal_set_features(journal_t *journal, unsigned long compat, + unsigned long ro, unsigned long incompat) + { + #define INCOMPAT_FEATURE_ON(f) \ +-- +2.39.2 + diff --git a/queue-5.4/jbd2-fix-kernel-doc-markups.patch b/queue-5.4/jbd2-fix-kernel-doc-markups.patch new file mode 100644 index 00000000000..ecddeaa60ae --- /dev/null +++ b/queue-5.4/jbd2-fix-kernel-doc-markups.patch @@ -0,0 +1,325 @@ +From 040fda541eb92d3a361ec1f2653403b079d5f228 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 16 Nov 2020 11:18:08 +0100 +Subject: jbd2: fix kernel-doc markups + +From: Mauro Carvalho Chehab + +[ Upstream commit 2bf31d94423c8ae3ff58e38a115b177df6940399 ] + +Kernel-doc markup should use this format: + identifier - description + +They should not have any type before that, as otherwise +the parser won't do the right thing. + +Also, some identifiers have different names between their +prototypes and the kernel-doc markup. + +Reviewed-by: Jan Kara +Signed-off-by: Mauro Carvalho Chehab +Link: https://lore.kernel.org/r/72f5c6628f5f278d67625f60893ffbc2ca28d46e.1605521731.git.mchehab+huawei@kernel.org +Signed-off-by: Theodore Ts'o +Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint") +Signed-off-by: Sasha Levin +--- + fs/jbd2/journal.c | 34 ++++++++++++++++++---------------- + fs/jbd2/transaction.c | 31 ++++++++++++++++--------------- + include/linux/jbd2.h | 2 +- + 3 files changed, 35 insertions(+), 32 deletions(-) + +diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c +index 1fa88b5bb1cb0..eeebe64b7c543 100644 +--- a/fs/jbd2/journal.c ++++ b/fs/jbd2/journal.c +@@ -562,12 +562,14 @@ static int __jbd2_journal_force_commit(journal_t *journal) + } + + /** +- * Force and wait upon a commit if the calling process is not within +- * transaction. This is used for forcing out undo-protected data which contains +- * bitmaps, when the fs is running out of space. ++ * jbd2_journal_force_commit_nested - Force and wait upon a commit if the ++ * calling process is not within transaction. + * + * @journal: journal to force + * Returns true if progress was made. ++ * ++ * This is used for forcing out undo-protected data which contains ++ * bitmaps, when the fs is running out of space. + */ + int jbd2_journal_force_commit_nested(journal_t *journal) + { +@@ -578,7 +580,7 @@ int jbd2_journal_force_commit_nested(journal_t *journal) + } + + /** +- * int journal_force_commit() - force any uncommitted transactions ++ * jbd2_journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * Caller want unconditional commit. We can only force the running transaction +@@ -1634,7 +1636,7 @@ static int load_superblock(journal_t *journal) + + + /** +- * int jbd2_journal_load() - Read journal from disk. ++ * jbd2_journal_load() - Read journal from disk. + * @journal: Journal to act on. + * + * Given a journal_t structure which tells us which disk blocks contain +@@ -1704,7 +1706,7 @@ int jbd2_journal_load(journal_t *journal) + } + + /** +- * void jbd2_journal_destroy() - Release a journal_t structure. ++ * jbd2_journal_destroy() - Release a journal_t structure. + * @journal: Journal to act on. + * + * Release a journal_t structure once it is no longer in use by the +@@ -1780,7 +1782,7 @@ int jbd2_journal_destroy(journal_t *journal) + + + /** +- *int jbd2_journal_check_used_features() - Check if features specified are used. ++ * jbd2_journal_check_used_features() - Check if features specified are used. + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount +@@ -1815,7 +1817,7 @@ int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat, + } + + /** +- * int jbd2_journal_check_available_features() - Check feature set in journalling layer ++ * jbd2_journal_check_available_features() - Check feature set in journalling layer + * @journal: Journal to check. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount +@@ -1847,7 +1849,7 @@ int jbd2_journal_check_available_features(journal_t *journal, unsigned long comp + } + + /** +- * int jbd2_journal_set_features() - Mark a given journal feature in the superblock ++ * jbd2_journal_set_features() - Mark a given journal feature in the superblock + * @journal: Journal to act on. + * @compat: bitmask of compatible features + * @ro: bitmask of features that force read-only mount +@@ -1929,7 +1931,7 @@ int jbd2_journal_set_features(journal_t *journal, unsigned long compat, + } + + /* +- * jbd2_journal_clear_features () - Clear a given journal feature in the ++ * jbd2_journal_clear_features() - Clear a given journal feature in the + * superblock + * @journal: Journal to act on. + * @compat: bitmask of compatible features +@@ -1956,7 +1958,7 @@ void jbd2_journal_clear_features(journal_t *journal, unsigned long compat, + EXPORT_SYMBOL(jbd2_journal_clear_features); + + /** +- * int jbd2_journal_flush () - Flush journal ++ * jbd2_journal_flush() - Flush journal + * @journal: Journal to act on. + * + * Flush all data for a given journal to disk and empty the journal. +@@ -2031,7 +2033,7 @@ int jbd2_journal_flush(journal_t *journal) + } + + /** +- * int jbd2_journal_wipe() - Wipe journal contents ++ * jbd2_journal_wipe() - Wipe journal contents + * @journal: Journal to act on. + * @write: flag (see below) + * +@@ -2072,7 +2074,7 @@ int jbd2_journal_wipe(journal_t *journal, int write) + } + + /** +- * void jbd2_journal_abort () - Shutdown the journal immediately. ++ * jbd2_journal_abort () - Shutdown the journal immediately. + * @journal: the journal to shutdown. + * @errno: an error number to record in the journal indicating + * the reason for the shutdown. +@@ -2158,7 +2160,7 @@ void jbd2_journal_abort(journal_t *journal, int errno) + } + + /** +- * int jbd2_journal_errno () - returns the journal's error state. ++ * jbd2_journal_errno() - returns the journal's error state. + * @journal: journal to examine. + * + * This is the errno number set with jbd2_journal_abort(), the last +@@ -2182,7 +2184,7 @@ int jbd2_journal_errno(journal_t *journal) + } + + /** +- * int jbd2_journal_clear_err () - clears the journal's error state ++ * jbd2_journal_clear_err() - clears the journal's error state + * @journal: journal to act on. + * + * An error must be cleared or acked to take a FS out of readonly +@@ -2202,7 +2204,7 @@ int jbd2_journal_clear_err(journal_t *journal) + } + + /** +- * void jbd2_journal_ack_err() - Ack journal err. ++ * jbd2_journal_ack_err() - Ack journal err. + * @journal: journal to act on. + * + * An error must be cleared or acked to take a FS out of readonly +diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c +index 09f4d00fece2f..91c2d3f6d1b3b 100644 +--- a/fs/jbd2/transaction.c ++++ b/fs/jbd2/transaction.c +@@ -490,7 +490,7 @@ EXPORT_SYMBOL(jbd2__journal_start); + + + /** +- * handle_t *jbd2_journal_start() - Obtain a new handle. ++ * jbd2_journal_start() - Obtain a new handle. + * @journal: Journal to start transaction on. + * @nblocks: number of block buffer we might modify + * +@@ -525,7 +525,7 @@ void jbd2_journal_free_reserved(handle_t *handle) + EXPORT_SYMBOL(jbd2_journal_free_reserved); + + /** +- * int jbd2_journal_start_reserved() - start reserved handle ++ * jbd2_journal_start_reserved() - start reserved handle + * @handle: handle to start + * @type: for handle statistics + * @line_no: for handle statistics +@@ -579,7 +579,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, + EXPORT_SYMBOL(jbd2_journal_start_reserved); + + /** +- * int jbd2_journal_extend() - extend buffer credits. ++ * jbd2_journal_extend() - extend buffer credits. + * @handle: handle to 'extend' + * @nblocks: nr blocks to try to extend by. + * +@@ -659,7 +659,7 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) + + + /** +- * int jbd2_journal_restart() - restart a handle . ++ * jbd2__journal_restart() - restart a handle . + * @handle: handle to restart + * @nblocks: nr credits requested + * @gfp_mask: memory allocation flags (for start_this_handle) +@@ -736,7 +736,7 @@ int jbd2_journal_restart(handle_t *handle, int nblocks) + EXPORT_SYMBOL(jbd2_journal_restart); + + /** +- * void jbd2_journal_lock_updates () - establish a transaction barrier. ++ * jbd2_journal_lock_updates () - establish a transaction barrier. + * @journal: Journal to establish a barrier on. + * + * This locks out any further updates from being started, and blocks +@@ -795,7 +795,7 @@ void jbd2_journal_lock_updates(journal_t *journal) + } + + /** +- * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier ++ * jbd2_journal_unlock_updates () - release barrier + * @journal: Journal to release the barrier on. + * + * Release a transaction barrier obtained with jbd2_journal_lock_updates(). +@@ -1103,7 +1103,8 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, + } + + /** +- * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. ++ * jbd2_journal_get_write_access() - notify intent to modify a buffer ++ * for metadata (not data) update. + * @handle: transaction to add buffer modifications to + * @bh: bh to be used for metadata writes + * +@@ -1147,7 +1148,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) + * unlocked buffer beforehand. */ + + /** +- * int jbd2_journal_get_create_access () - notify intent to use newly created bh ++ * jbd2_journal_get_create_access () - notify intent to use newly created bh + * @handle: transaction to new buffer to + * @bh: new buffer. + * +@@ -1227,7 +1228,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) + } + + /** +- * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with ++ * jbd2_journal_get_undo_access() - Notify intent to modify metadata with + * non-rewindable consequences + * @handle: transaction + * @bh: buffer to undo +@@ -1304,7 +1305,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) + } + + /** +- * void jbd2_journal_set_triggers() - Add triggers for commit writeout ++ * jbd2_journal_set_triggers() - Add triggers for commit writeout + * @bh: buffer to trigger on + * @type: struct jbd2_buffer_trigger_type containing the trigger(s). + * +@@ -1346,7 +1347,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, + } + + /** +- * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata ++ * jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata + * @handle: transaction to add buffer to. + * @bh: buffer to mark + * +@@ -1524,7 +1525,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) + } + + /** +- * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. ++ * jbd2_journal_forget() - bforget() for potentially-journaled buffers. + * @handle: transaction handle + * @bh: bh to 'forget' + * +@@ -1699,7 +1700,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) + } + + /** +- * int jbd2_journal_stop() - complete a transaction ++ * jbd2_journal_stop() - complete a transaction + * @handle: transaction to complete. + * + * All done for a particular handle. +@@ -2047,7 +2048,7 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) + } + + /** +- * int jbd2_journal_try_to_free_buffers() - try to free page buffers. ++ * jbd2_journal_try_to_free_buffers() - try to free page buffers. + * @journal: journal for operation + * @page: to try and free + * @gfp_mask: we use the mask to detect how hard should we try to release +@@ -2386,7 +2387,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, + } + + /** +- * void jbd2_journal_invalidatepage() ++ * jbd2_journal_invalidatepage() + * @journal: journal to use for flush... + * @page: page to flush + * @offset: start of the range to invalidate +diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h +index b0e97e5de8ca4..b60adc4210b57 100644 +--- a/include/linux/jbd2.h ++++ b/include/linux/jbd2.h +@@ -415,7 +415,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh) + #define JI_WAIT_DATA (1 << __JI_WAIT_DATA) + + /** +- * struct jbd_inode - The jbd_inode type is the structure linking inodes in ++ * struct jbd2_inode - The jbd_inode type is the structure linking inodes in + * ordered mode present in a transaction so that we can sync them during commit. + */ + struct jbd2_inode { +-- +2.39.2 + diff --git a/queue-5.4/jbd2-fix-wrongly-judgement-for-buffer-head-removing-.patch b/queue-5.4/jbd2-fix-wrongly-judgement-for-buffer-head-removing-.patch new file mode 100644 index 00000000000..9f0dc8a7dc6 --- /dev/null +++ b/queue-5.4/jbd2-fix-wrongly-judgement-for-buffer-head-removing-.patch @@ -0,0 +1,106 @@ +From a3d9c0ec68dd178448d2f7cbd750d9fc21604008 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 21:59:26 +0800 +Subject: jbd2: Fix wrongly judgement for buffer head removing while doing + checkpoint + +From: Zhihao Cheng + +[ Upstream commit e34c8dd238d0c9368b746480f313055f5bab5040 ] + +Following process, + +jbd2_journal_commit_transaction +// there are several dirty buffer heads in transaction->t_checkpoint_list + P1 wb_workfn +jbd2_log_do_checkpoint + if (buffer_locked(bh)) // false + __block_write_full_page + trylock_buffer(bh) + test_clear_buffer_dirty(bh) + if (!buffer_dirty(bh)) + __jbd2_journal_remove_checkpoint(jh) + if (buffer_write_io_error(bh)) // false + >> bh IO error occurs << + jbd2_cleanup_journal_tail + __jbd2_update_log_tail + jbd2_write_superblock + // The bh won't be replayed in next mount. +, which could corrupt the ext4 image, fetch a reproducer in [Link]. + +Since writeback process clears buffer dirty after locking buffer head, +we can fix it by try locking buffer and check dirtiness while buffer is +locked, the buffer head can be removed if it is neither dirty nor locked. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217490 +Fixes: 470decc613ab ("[PATCH] jbd2: initial copy of files from jbd") +Signed-off-by: Zhihao Cheng +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230606135928.434610-5-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 32 +++++++++++++++++--------------- + 1 file changed, 17 insertions(+), 15 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index 587b89b67c1c6..edb17822f8e6b 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -228,20 +228,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) + jh = transaction->t_checkpoint_list; + bh = jh2bh(jh); + +- /* +- * The buffer may be writing back, or flushing out in the +- * last couple of cycles, or re-adding into a new transaction, +- * need to check it again until it's unlocked. +- */ +- if (buffer_locked(bh)) { +- get_bh(bh); +- spin_unlock(&journal->j_list_lock); +- wait_on_buffer(bh); +- /* the journal_head may have gone by now */ +- BUFFER_TRACE(bh, "brelse"); +- __brelse(bh); +- goto retry; +- } + if (jh->b_transaction != NULL) { + transaction_t *t = jh->b_transaction; + tid_t tid = t->t_tid; +@@ -276,7 +262,22 @@ int jbd2_log_do_checkpoint(journal_t *journal) + spin_lock(&journal->j_list_lock); + goto restart; + } +- if (!buffer_dirty(bh)) { ++ if (!trylock_buffer(bh)) { ++ /* ++ * The buffer is locked, it may be writing back, or ++ * flushing out in the last couple of cycles, or ++ * re-adding into a new transaction, need to check ++ * it again until it's unlocked. ++ */ ++ get_bh(bh); ++ spin_unlock(&journal->j_list_lock); ++ wait_on_buffer(bh); ++ /* the journal_head may have gone by now */ ++ BUFFER_TRACE(bh, "brelse"); ++ __brelse(bh); ++ goto retry; ++ } else if (!buffer_dirty(bh)) { ++ unlock_buffer(bh); + BUFFER_TRACE(bh, "remove from checkpoint"); + /* + * If the transaction was released or the checkpoint +@@ -286,6 +287,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) + !transaction->t_checkpoint_list) + goto out; + } else { ++ unlock_buffer(bh); + /* + * We are about to write the buffer, it could be + * raced by some other transaction shrink or buffer +-- +2.39.2 + diff --git a/queue-5.4/jbd2-recheck-chechpointing-non-dirty-buffer.patch b/queue-5.4/jbd2-recheck-chechpointing-non-dirty-buffer.patch new file mode 100644 index 00000000000..cf895d00a1f --- /dev/null +++ b/queue-5.4/jbd2-recheck-chechpointing-non-dirty-buffer.patch @@ -0,0 +1,197 @@ +From fa6a8c35dee2c3cb23850171d9e17d01e9725c69 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Jun 2023 21:59:23 +0800 +Subject: jbd2: recheck chechpointing non-dirty buffer + +From: Zhang Yi + +[ Upstream commit c2d6fd9d6f35079f1669f0100f05b46708c74b7f ] + +There is a long-standing metadata corruption issue that happens from +time to time, but it's very difficult to reproduce and analyse, benefit +from the JBD2_CYCLE_RECORD option, we found out that the problem is the +checkpointing process miss to write out some buffers which are raced by +another do_get_write_access(). Looks below for detail. + +jbd2_log_do_checkpoint() //transaction X + //buffer A is dirty and not belones to any transaction + __buffer_relink_io() //move it to the IO list + __flush_batch() + write_dirty_buffer() + do_get_write_access() + clear_buffer_dirty + __jbd2_journal_file_buffer() + //add buffer A to a new transaction Y + lock_buffer(bh) + //doesn't write out + __jbd2_journal_remove_checkpoint() + //finish checkpoint except buffer A + //filesystem corrupt if the new transaction Y isn't fully write out. + +Due to the t_checkpoint_list walking loop in jbd2_log_do_checkpoint() +have already handles waiting for buffers under IO and re-added new +transaction to complete commit, and it also removing cleaned buffers, +this makes sure the list will eventually get empty. So it's fine to +leave buffers on the t_checkpoint_list while flushing out and completely +stop using the t_checkpoint_io_list. + +Cc: stable@vger.kernel.org +Suggested-by: Jan Kara +Signed-off-by: Zhang Yi +Tested-by: Zhihao Cheng +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230606135928.434610-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint") +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 102 ++++++++++++------------------------------- + 1 file changed, 29 insertions(+), 73 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index c5f7b7a455fa4..587b89b67c1c6 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -57,28 +57,6 @@ static inline void __buffer_unlink(struct journal_head *jh) + } + } + +-/* +- * Move a buffer from the checkpoint list to the checkpoint io list +- * +- * Called with j_list_lock held +- */ +-static inline void __buffer_relink_io(struct journal_head *jh) +-{ +- transaction_t *transaction = jh->b_cp_transaction; +- +- __buffer_unlink_first(jh); +- +- if (!transaction->t_checkpoint_io_list) { +- jh->b_cpnext = jh->b_cpprev = jh; +- } else { +- jh->b_cpnext = transaction->t_checkpoint_io_list; +- jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev; +- jh->b_cpprev->b_cpnext = jh; +- jh->b_cpnext->b_cpprev = jh; +- } +- transaction->t_checkpoint_io_list = jh; +-} +- + /* + * Try to release a checkpointed buffer from its transaction. + * Returns 1 if we released it and 2 if we also released the +@@ -190,6 +168,7 @@ __flush_batch(journal_t *journal, int *batch_count) + struct buffer_head *bh = journal->j_chkpt_bhs[i]; + BUFFER_TRACE(bh, "brelse"); + __brelse(bh); ++ journal->j_chkpt_bhs[i] = NULL; + } + *batch_count = 0; + } +@@ -249,6 +228,11 @@ int jbd2_log_do_checkpoint(journal_t *journal) + jh = transaction->t_checkpoint_list; + bh = jh2bh(jh); + ++ /* ++ * The buffer may be writing back, or flushing out in the ++ * last couple of cycles, or re-adding into a new transaction, ++ * need to check it again until it's unlocked. ++ */ + if (buffer_locked(bh)) { + get_bh(bh); + spin_unlock(&journal->j_list_lock); +@@ -294,28 +278,32 @@ int jbd2_log_do_checkpoint(journal_t *journal) + } + if (!buffer_dirty(bh)) { + BUFFER_TRACE(bh, "remove from checkpoint"); +- if (__jbd2_journal_remove_checkpoint(jh)) +- /* The transaction was released; we're done */ ++ /* ++ * If the transaction was released or the checkpoint ++ * list was empty, we're done. ++ */ ++ if (__jbd2_journal_remove_checkpoint(jh) || ++ !transaction->t_checkpoint_list) + goto out; +- continue; ++ } else { ++ /* ++ * We are about to write the buffer, it could be ++ * raced by some other transaction shrink or buffer ++ * re-log logic once we release the j_list_lock, ++ * leave it on the checkpoint list and check status ++ * again to make sure it's clean. ++ */ ++ BUFFER_TRACE(bh, "queue"); ++ get_bh(bh); ++ J_ASSERT_BH(bh, !buffer_jwrite(bh)); ++ journal->j_chkpt_bhs[batch_count++] = bh; ++ transaction->t_chp_stats.cs_written++; ++ transaction->t_checkpoint_list = jh->b_cpnext; + } +- /* +- * Important: we are about to write the buffer, and +- * possibly block, while still holding the journal +- * lock. We cannot afford to let the transaction +- * logic start messing around with this buffer before +- * we write it to disk, as that would break +- * recoverability. +- */ +- BUFFER_TRACE(bh, "queue"); +- get_bh(bh); +- J_ASSERT_BH(bh, !buffer_jwrite(bh)); +- journal->j_chkpt_bhs[batch_count++] = bh; +- __buffer_relink_io(jh); +- transaction->t_chp_stats.cs_written++; ++ + if ((batch_count == JBD2_NR_BATCH) || +- need_resched() || +- spin_needbreak(&journal->j_list_lock)) ++ need_resched() || spin_needbreak(&journal->j_list_lock) || ++ jh2bh(transaction->t_checkpoint_list) == journal->j_chkpt_bhs[0]) + goto unlock_and_flush; + } + +@@ -329,38 +317,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) + goto restart; + } + +- /* +- * Now we issued all of the transaction's buffers, let's deal +- * with the buffers that are out for I/O. +- */ +-restart2: +- /* Did somebody clean up the transaction in the meanwhile? */ +- if (journal->j_checkpoint_transactions != transaction || +- transaction->t_tid != this_tid) +- goto out; +- +- while (transaction->t_checkpoint_io_list) { +- jh = transaction->t_checkpoint_io_list; +- bh = jh2bh(jh); +- if (buffer_locked(bh)) { +- get_bh(bh); +- spin_unlock(&journal->j_list_lock); +- wait_on_buffer(bh); +- /* the journal_head may have gone by now */ +- BUFFER_TRACE(bh, "brelse"); +- __brelse(bh); +- spin_lock(&journal->j_list_lock); +- goto restart2; +- } +- +- /* +- * Now in whatever state the buffer currently is, we +- * know that it has been written out and so we can +- * drop it from the list +- */ +- if (__jbd2_journal_remove_checkpoint(jh)) +- break; +- } + out: + spin_unlock(&journal->j_list_lock); + result = jbd2_cleanup_journal_tail(journal); +-- +2.39.2 + diff --git a/queue-5.4/jbd2-remove-redundant-buffer-io-error-checks.patch b/queue-5.4/jbd2-remove-redundant-buffer-io-error-checks.patch new file mode 100644 index 00000000000..daf92019f37 --- /dev/null +++ b/queue-5.4/jbd2-remove-redundant-buffer-io-error-checks.patch @@ -0,0 +1,79 @@ +From 59b6c61dfb944e68b9885724fd49ad9ec6fb41a1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 10 Jun 2021 19:24:36 +0800 +Subject: jbd2: remove redundant buffer io error checks + +From: Zhang Yi + +[ Upstream commit 214eb5a4d8a2032fb9f0711d1b202eb88ee02920 ] + +Now that __jbd2_journal_remove_checkpoint() can detect buffer io error +and mark journal checkpoint error, then we abort the journal later +before updating log tail to ensure the filesystem works consistently. +So we could remove other redundant buffer io error checkes. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20210610112440.3438139-5-yi.zhang@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: e34c8dd238d0 ("jbd2: Fix wrongly judgement for buffer head removing while doing checkpoint") +Signed-off-by: Sasha Levin +--- + fs/jbd2/checkpoint.c | 13 ++----------- + 1 file changed, 2 insertions(+), 11 deletions(-) + +diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c +index 5ef99b9ec8be7..c5f7b7a455fa4 100644 +--- a/fs/jbd2/checkpoint.c ++++ b/fs/jbd2/checkpoint.c +@@ -91,8 +91,7 @@ static int __try_to_free_cp_buf(struct journal_head *jh) + int ret = 0; + struct buffer_head *bh = jh2bh(jh); + +- if (jh->b_transaction == NULL && !buffer_locked(bh) && +- !buffer_dirty(bh) && !buffer_write_io_error(bh)) { ++ if (!jh->b_transaction && !buffer_locked(bh) && !buffer_dirty(bh)) { + JBUFFER_TRACE(jh, "remove from checkpoint list"); + ret = __jbd2_journal_remove_checkpoint(jh) + 1; + } +@@ -228,7 +227,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) + * OK, we need to start writing disk blocks. Take one transaction + * and write it. + */ +- result = 0; + spin_lock(&journal->j_list_lock); + if (!journal->j_checkpoint_transactions) + goto out; +@@ -295,8 +293,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) + goto restart; + } + if (!buffer_dirty(bh)) { +- if (unlikely(buffer_write_io_error(bh)) && !result) +- result = -EIO; + BUFFER_TRACE(bh, "remove from checkpoint"); + if (__jbd2_journal_remove_checkpoint(jh)) + /* The transaction was released; we're done */ +@@ -356,8 +352,6 @@ int jbd2_log_do_checkpoint(journal_t *journal) + spin_lock(&journal->j_list_lock); + goto restart2; + } +- if (unlikely(buffer_write_io_error(bh)) && !result) +- result = -EIO; + + /* + * Now in whatever state the buffer currently is, we +@@ -369,10 +363,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) + } + out: + spin_unlock(&journal->j_list_lock); +- if (result < 0) +- jbd2_journal_abort(journal, result); +- else +- result = jbd2_cleanup_journal_tail(journal); ++ result = jbd2_cleanup_journal_tail(journal); + + return (result < 0) ? result : 0; + } +-- +2.39.2 + diff --git a/queue-5.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-asso.patch b/queue-5.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-asso.patch new file mode 100644 index 00000000000..8a780c654b7 --- /dev/null +++ b/queue-5.4/keys-fix-linking-a-duplicate-key-to-a-keyring-s-asso.patch @@ -0,0 +1,182 @@ +From f2449d8f5cc38bb75a7163c7c952461b9f5136e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Mar 2023 14:04:12 +0100 +Subject: keys: Fix linking a duplicate key to a keyring's assoc_array + +From: Petr Pavlu + +[ Upstream commit d55901522f96082a43b9842d34867363c0cdbac5 ] + +When making a DNS query inside the kernel using dns_query(), the request +code can in rare cases end up creating a duplicate index key in the +assoc_array of the destination keyring. It is eventually found by +a BUG_ON() check in the assoc_array implementation and results in +a crash. + +Example report: +[2158499.700025] kernel BUG at ../lib/assoc_array.c:652! +[2158499.700039] invalid opcode: 0000 [#1] SMP PTI +[2158499.700065] CPU: 3 PID: 31985 Comm: kworker/3:1 Kdump: loaded Not tainted 5.3.18-150300.59.90-default #1 SLE15-SP3 +[2158499.700096] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020 +[2158499.700351] Workqueue: cifsiod cifs_resolve_server [cifs] +[2158499.700380] RIP: 0010:assoc_array_insert+0x85f/0xa40 +[2158499.700401] Code: ff 74 2b 48 8b 3b 49 8b 45 18 4c 89 e6 48 83 e7 fe e8 95 ec 74 00 3b 45 88 7d db 85 c0 79 d4 0f 0b 0f 0b 0f 0b e8 41 f2 be ff <0f> 0b 0f 0b 81 7d 88 ff ff ff 7f 4c 89 eb 4c 8b ad 58 ff ff ff 0f +[2158499.700448] RSP: 0018:ffffc0bd6187faf0 EFLAGS: 00010282 +[2158499.700470] RAX: ffff9f1ea7da2fe8 RBX: ffff9f1ea7da2fc1 RCX: 0000000000000005 +[2158499.700492] RDX: 0000000000000000 RSI: 0000000000000005 RDI: 0000000000000000 +[2158499.700515] RBP: ffffc0bd6187fbb0 R08: ffff9f185faf1100 R09: 0000000000000000 +[2158499.700538] R10: ffff9f1ea7da2cc0 R11: 000000005ed8cec8 R12: ffffc0bd6187fc28 +[2158499.700561] R13: ffff9f15feb8d000 R14: ffff9f1ea7da2fc0 R15: ffff9f168dc0d740 +[2158499.700585] FS: 0000000000000000(0000) GS:ffff9f185fac0000(0000) knlGS:0000000000000000 +[2158499.700610] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +[2158499.700630] CR2: 00007fdd94fca238 CR3: 0000000809d8c006 CR4: 00000000003706e0 +[2158499.700702] Call Trace: +[2158499.700741] ? key_alloc+0x447/0x4b0 +[2158499.700768] ? __key_link_begin+0x43/0xa0 +[2158499.700790] __key_link_begin+0x43/0xa0 +[2158499.700814] request_key_and_link+0x2c7/0x730 +[2158499.700847] ? dns_resolver_read+0x20/0x20 [dns_resolver] +[2158499.700873] ? key_default_cmp+0x20/0x20 +[2158499.700898] request_key_tag+0x43/0xa0 +[2158499.700926] dns_query+0x114/0x2ca [dns_resolver] +[2158499.701127] dns_resolve_server_name_to_ip+0x194/0x310 [cifs] +[2158499.701164] ? scnprintf+0x49/0x90 +[2158499.701190] ? __switch_to_asm+0x40/0x70 +[2158499.701211] ? __switch_to_asm+0x34/0x70 +[2158499.701405] reconn_set_ipaddr_from_hostname+0x81/0x2a0 [cifs] +[2158499.701603] cifs_resolve_server+0x4b/0xd0 [cifs] +[2158499.701632] process_one_work+0x1f8/0x3e0 +[2158499.701658] worker_thread+0x2d/0x3f0 +[2158499.701682] ? process_one_work+0x3e0/0x3e0 +[2158499.701703] kthread+0x10d/0x130 +[2158499.701723] ? kthread_park+0xb0/0xb0 +[2158499.701746] ret_from_fork+0x1f/0x40 + +The situation occurs as follows: +* Some kernel facility invokes dns_query() to resolve a hostname, for + example, "abcdef". The function registers its global DNS resolver + cache as current->cred.thread_keyring and passes the query to + request_key_net() -> request_key_tag() -> request_key_and_link(). +* Function request_key_and_link() creates a keyring_search_context + object. Its match_data.cmp method gets set via a call to + type->match_preparse() (resolves to dns_resolver_match_preparse()) to + dns_resolver_cmp(). +* Function request_key_and_link() continues and invokes + search_process_keyrings_rcu() which returns that a given key was not + found. The control is then passed to request_key_and_link() -> + construct_alloc_key(). +* Concurrently to that, a second task similarly makes a DNS query for + "abcdef." and its result gets inserted into the DNS resolver cache. +* Back on the first task, function construct_alloc_key() first runs + __key_link_begin() to determine an assoc_array_edit operation to + insert a new key. Index keys in the array are compared exactly as-is, + using keyring_compare_object(). The operation finds that "abcdef" is + not yet present in the destination keyring. +* Function construct_alloc_key() continues and checks if a given key is + already present on some keyring by again calling + search_process_keyrings_rcu(). This search is done using + dns_resolver_cmp() and "abcdef" gets matched with now present key + "abcdef.". +* The found key is linked on the destination keyring by calling + __key_link() and using the previously calculated assoc_array_edit + operation. This inserts the "abcdef." key in the array but creates + a duplicity because the same index key is already present. + +Fix the problem by postponing __key_link_begin() in +construct_alloc_key() until an actual key which should be linked into +the destination keyring is determined. + +[jarkko@kernel.org: added a fixes tag and cc to stable] +Cc: stable@vger.kernel.org # v5.3+ +Fixes: df593ee23e05 ("keys: Hoist locking out of __key_link_begin()") +Signed-off-by: Petr Pavlu +Reviewed-by: Joey Lee +Reviewed-by: Jarkko Sakkinen +Signed-off-by: Jarkko Sakkinen +Signed-off-by: Sasha Levin +--- + security/keys/request_key.c | 35 ++++++++++++++++++++++++----------- + 1 file changed, 24 insertions(+), 11 deletions(-) + +diff --git a/security/keys/request_key.c b/security/keys/request_key.c +index 17c9c0cfb6f59..964e2456f34da 100644 +--- a/security/keys/request_key.c ++++ b/security/keys/request_key.c +@@ -401,17 +401,21 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); + + if (dest_keyring) { +- ret = __key_link_lock(dest_keyring, &ctx->index_key); ++ ret = __key_link_lock(dest_keyring, &key->index_key); + if (ret < 0) + goto link_lock_failed; +- ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); +- if (ret < 0) +- goto link_prealloc_failed; + } + +- /* attach the key to the destination keyring under lock, but we do need ++ /* ++ * Attach the key to the destination keyring under lock, but we do need + * to do another check just in case someone beat us to it whilst we +- * waited for locks */ ++ * waited for locks. ++ * ++ * The caller might specify a comparison function which looks for keys ++ * that do not exactly match but are still equivalent from the caller's ++ * perspective. The __key_link_begin() operation must be done only after ++ * an actual key is determined. ++ */ + mutex_lock(&key_construction_mutex); + + rcu_read_lock(); +@@ -420,12 +424,16 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + if (!IS_ERR(key_ref)) + goto key_already_present; + +- if (dest_keyring) ++ if (dest_keyring) { ++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit); ++ if (ret < 0) ++ goto link_alloc_failed; + __key_link(key, &edit); ++ } + + mutex_unlock(&key_construction_mutex); + if (dest_keyring) +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++ __key_link_end(dest_keyring, &key->index_key, edit); + mutex_unlock(&user->cons_lock); + *_key = key; + kleave(" = 0 [%d]", key_serial(key)); +@@ -438,10 +446,13 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + mutex_unlock(&key_construction_mutex); + key = key_ref_to_ptr(key_ref); + if (dest_keyring) { ++ ret = __key_link_begin(dest_keyring, &key->index_key, &edit); ++ if (ret < 0) ++ goto link_alloc_failed_unlocked; + ret = __key_link_check_live_key(dest_keyring, key); + if (ret == 0) + __key_link(key, &edit); +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++ __key_link_end(dest_keyring, &key->index_key, edit); + if (ret < 0) + goto link_check_failed; + } +@@ -456,8 +467,10 @@ static int construct_alloc_key(struct keyring_search_context *ctx, + kleave(" = %d [linkcheck]", ret); + return ret; + +-link_prealloc_failed: +- __key_link_end(dest_keyring, &ctx->index_key, edit); ++link_alloc_failed: ++ mutex_unlock(&key_construction_mutex); ++link_alloc_failed_unlocked: ++ __key_link_end(dest_keyring, &key->index_key, edit); + link_lock_failed: + mutex_unlock(&user->cons_lock); + key_put(key); +-- +2.39.2 + diff --git a/queue-5.4/pci-aspm-avoid-link-retraining-race.patch b/queue-5.4/pci-aspm-avoid-link-retraining-race.patch new file mode 100644 index 00000000000..d35318eedea --- /dev/null +++ b/queue-5.4/pci-aspm-avoid-link-retraining-race.patch @@ -0,0 +1,64 @@ +From a1bdaff2e17250dbb54c141a0e82666f07d009ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 May 2023 11:39:23 +0300 +Subject: PCI/ASPM: Avoid link retraining race +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit e7e39756363ad5bd83ddeae1063193d0f13870fd ] + +PCIe r6.0.1, sec 7.5.3.7, recommends setting the link control parameters, +then waiting for the Link Training bit to be clear before setting the +Retrain Link bit. + +This avoids a race where the LTSSM may not use the updated parameters if it +is already in the midst of link training because of other normal link +activity. + +Wait for the Link Training bit to be clear before toggling the Retrain Link +bit to ensure that the LTSSM uses the updated link control parameters. + +[bhelgaas: commit log, return 0 (success)/-ETIMEDOUT instead of bool for +both pcie_wait_for_retrain() and the existing pcie_retrain_link()] +Suggested-by: Lukas Wunner +Fixes: 7d715a6c1ae5 ("PCI: add PCI Express ASPM support") +Link: https://lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com +Signed-off-by: Ilpo Järvinen +Signed-off-by: Bjorn Helgaas +Reviewed-by: Lukas Wunner +Cc: stable@vger.kernel.org +Signed-off-by: Sasha Levin +--- + drivers/pci/pcie/aspm.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c +index 749a367bde2df..55270180ae081 100644 +--- a/drivers/pci/pcie/aspm.c ++++ b/drivers/pci/pcie/aspm.c +@@ -220,8 +220,19 @@ static int pcie_wait_for_retrain(struct pci_dev *pdev) + static int pcie_retrain_link(struct pcie_link_state *link) + { + struct pci_dev *parent = link->pdev; ++ int rc; + u16 reg16; + ++ /* ++ * Ensure the updated LNKCTL parameters are used during link ++ * training by checking that there is no ongoing link training to ++ * avoid LTSSM race as recommended in Implementation Note at the ++ * end of PCIe r6.0.1 sec 7.5.3.7. ++ */ ++ rc = pcie_wait_for_retrain(parent); ++ if (rc) ++ return rc; ++ + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); + reg16 |= PCI_EXP_LNKCTL_RL; + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); +-- +2.39.2 + diff --git a/queue-5.4/pci-aspm-factor-out-pcie_wait_for_retrain.patch b/queue-5.4/pci-aspm-factor-out-pcie_wait_for_retrain.patch new file mode 100644 index 00000000000..db2395bbd62 --- /dev/null +++ b/queue-5.4/pci-aspm-factor-out-pcie_wait_for_retrain.patch @@ -0,0 +1,79 @@ +From 7920d0ac476bd6e52af32cde4e47c21f5a28d62b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 14:49:33 -0500 +Subject: PCI/ASPM: Factor out pcie_wait_for_retrain() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Ilpo Järvinen + +[ Upstream commit 9c7f136433d26592cb4d9cd00b4e15c33d9797c6 ] + +Factor pcie_wait_for_retrain() out from pcie_retrain_link(). No functional +change intended. + +[bhelgaas: split out from +https: //lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com] +Signed-off-by: Ilpo Järvinen +Signed-off-by: Bjorn Helgaas +Stable-dep-of: e7e39756363a ("PCI/ASPM: Avoid link retraining race") +Signed-off-by: Sasha Levin +--- + drivers/pci/pcie/aspm.c | 30 ++++++++++++++++++------------ + 1 file changed, 18 insertions(+), 12 deletions(-) + +diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c +index 3197ac6dda35b..749a367bde2df 100644 +--- a/drivers/pci/pcie/aspm.c ++++ b/drivers/pci/pcie/aspm.c +@@ -200,10 +200,26 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) + link->clkpm_disable = blacklist ? 1 : 0; + } + ++static int pcie_wait_for_retrain(struct pci_dev *pdev) ++{ ++ unsigned long end_jiffies; ++ u16 reg16; ++ ++ /* Wait for Link Training to be cleared by hardware */ ++ end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT; ++ do { ++ pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, ®16); ++ if (!(reg16 & PCI_EXP_LNKSTA_LT)) ++ return 0; ++ msleep(1); ++ } while (time_before(jiffies, end_jiffies)); ++ ++ return -ETIMEDOUT; ++} ++ + static int pcie_retrain_link(struct pcie_link_state *link) + { + struct pci_dev *parent = link->pdev; +- unsigned long end_jiffies; + u16 reg16; + + pcie_capability_read_word(parent, PCI_EXP_LNKCTL, ®16); +@@ -219,17 +235,7 @@ static int pcie_retrain_link(struct pcie_link_state *link) + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); + } + +- /* Wait for link training end. Break out after waiting for timeout */ +- end_jiffies = jiffies + LINK_RETRAIN_TIMEOUT; +- do { +- pcie_capability_read_word(parent, PCI_EXP_LNKSTA, ®16); +- if (!(reg16 & PCI_EXP_LNKSTA_LT)) +- break; +- msleep(1); +- } while (time_before(jiffies, end_jiffies)); +- if (reg16 & PCI_EXP_LNKSTA_LT) +- return -ETIMEDOUT; +- return 0; ++ return pcie_wait_for_retrain(parent); + } + + /* +-- +2.39.2 + diff --git a/queue-5.4/pci-aspm-return-0-or-etimedout-from-pcie_retrain_lin.patch b/queue-5.4/pci-aspm-return-0-or-etimedout-from-pcie_retrain_lin.patch new file mode 100644 index 00000000000..e7c0d2c85de --- /dev/null +++ b/queue-5.4/pci-aspm-return-0-or-etimedout-from-pcie_retrain_lin.patch @@ -0,0 +1,72 @@ +From d86d8d098cd05f8e3a2b27084b97e5028bbcff81 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Jun 2023 14:44:55 -0500 +Subject: PCI/ASPM: Return 0 or -ETIMEDOUT from pcie_retrain_link() + +From: Bjorn Helgaas + +[ Upstream commit f5297a01ee805d7fa569d288ed65fc0f9ac9b03d ] + +"pcie_retrain_link" is not a question with a true/false answer, so "bool" +isn't quite the right return type. Return 0 for success or -ETIMEDOUT if +the retrain failed. No functional change intended. + +[bhelgaas: based on Ilpo's patch below] +Link: https://lore.kernel.org/r/20230502083923.34562-1-ilpo.jarvinen@linux.intel.com +Signed-off-by: Bjorn Helgaas +Stable-dep-of: e7e39756363a ("PCI/ASPM: Avoid link retraining race") +Signed-off-by: Sasha Levin +--- + drivers/pci/pcie/aspm.c | 20 +++++++++++--------- + 1 file changed, 11 insertions(+), 9 deletions(-) + +diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c +index d8d27b11b48c4..3197ac6dda35b 100644 +--- a/drivers/pci/pcie/aspm.c ++++ b/drivers/pci/pcie/aspm.c +@@ -200,7 +200,7 @@ static void pcie_clkpm_cap_init(struct pcie_link_state *link, int blacklist) + link->clkpm_disable = blacklist ? 1 : 0; + } + +-static bool pcie_retrain_link(struct pcie_link_state *link) ++static int pcie_retrain_link(struct pcie_link_state *link) + { + struct pci_dev *parent = link->pdev; + unsigned long end_jiffies; +@@ -227,7 +227,9 @@ static bool pcie_retrain_link(struct pcie_link_state *link) + break; + msleep(1); + } while (time_before(jiffies, end_jiffies)); +- return !(reg16 & PCI_EXP_LNKSTA_LT); ++ if (reg16 & PCI_EXP_LNKSTA_LT) ++ return -ETIMEDOUT; ++ return 0; + } + + /* +@@ -296,15 +298,15 @@ static void pcie_aspm_configure_common_clock(struct pcie_link_state *link) + reg16 &= ~PCI_EXP_LNKCTL_CCC; + pcie_capability_write_word(parent, PCI_EXP_LNKCTL, reg16); + +- if (pcie_retrain_link(link)) +- return; ++ if (pcie_retrain_link(link)) { + +- /* Training failed. Restore common clock configurations */ +- pci_err(parent, "ASPM: Could not configure common clock\n"); +- list_for_each_entry(child, &linkbus->devices, bus_list) +- pcie_capability_write_word(child, PCI_EXP_LNKCTL, ++ /* Training failed. Restore common clock configurations */ ++ pci_err(parent, "ASPM: Could not configure common clock\n"); ++ list_for_each_entry(child, &linkbus->devices, bus_list) ++ pcie_capability_write_word(child, PCI_EXP_LNKCTL, + child_reg[PCI_FUNC(child->devfn)]); +- pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); ++ pcie_capability_write_word(parent, PCI_EXP_LNKCTL, parent_reg); ++ } + } + + /* Convert L0s latency encoding to ns */ +-- +2.39.2 + diff --git a/queue-5.4/pwm-meson-fix-handling-of-period-duty-if-greater-tha.patch b/queue-5.4/pwm-meson-fix-handling-of-period-duty-if-greater-tha.patch new file mode 100644 index 00000000000..867b1c4524c --- /dev/null +++ b/queue-5.4/pwm-meson-fix-handling-of-period-duty-if-greater-tha.patch @@ -0,0 +1,85 @@ +From 75ddf4da183a423c1b36d40d0ecd876067f67476 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 24 May 2023 21:48:36 +0200 +Subject: pwm: meson: fix handling of period/duty if greater than UINT_MAX +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Heiner Kallweit + +[ Upstream commit 87a2cbf02d7701255f9fcca7e5bd864a7bb397cf ] + +state->period/duty are of type u64, and if their value is greater than +UINT_MAX, then the cast to uint will cause problems. Fix this by +changing the type of the respective local variables to u64. + +Fixes: b79c3670e120 ("pwm: meson: Don't duplicate the polarity internally") +Cc: stable@vger.kernel.org +Suggested-by: Uwe Kleine-König +Reviewed-by: Martin Blumenstingl +Signed-off-by: Heiner Kallweit +Signed-off-by: Thierry Reding +Signed-off-by: Sasha Levin +--- + drivers/pwm/pwm-meson.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c +index d594dac41f6da..768e6e691c7cc 100644 +--- a/drivers/pwm/pwm-meson.c ++++ b/drivers/pwm/pwm-meson.c +@@ -163,8 +163,9 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + const struct pwm_state *state) + { + struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; +- unsigned int duty, period, pre_div, cnt, duty_cnt; ++ unsigned int pre_div, cnt, duty_cnt; + unsigned long fin_freq; ++ u64 duty, period; + + duty = state->duty_cycle; + period = state->period; +@@ -186,19 +187,19 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + + dev_dbg(meson->chip.dev, "fin_freq: %lu Hz\n", fin_freq); + +- pre_div = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * 0xffffLL); ++ pre_div = div64_u64(fin_freq * period, NSEC_PER_SEC * 0xffffLL); + if (pre_div > MISC_CLK_DIV_MASK) { + dev_err(meson->chip.dev, "unable to get period pre_div\n"); + return -EINVAL; + } + +- cnt = div64_u64(fin_freq * (u64)period, NSEC_PER_SEC * (pre_div + 1)); ++ cnt = div64_u64(fin_freq * period, NSEC_PER_SEC * (pre_div + 1)); + if (cnt > 0xffff) { + dev_err(meson->chip.dev, "unable to get period cnt\n"); + return -EINVAL; + } + +- dev_dbg(meson->chip.dev, "period=%u pre_div=%u cnt=%u\n", period, ++ dev_dbg(meson->chip.dev, "period=%llu pre_div=%u cnt=%u\n", period, + pre_div, cnt); + + if (duty == period) { +@@ -211,14 +212,13 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + channel->lo = cnt; + } else { + /* Then check is we can have the duty with the same pre_div */ +- duty_cnt = div64_u64(fin_freq * (u64)duty, +- NSEC_PER_SEC * (pre_div + 1)); ++ duty_cnt = div64_u64(fin_freq * duty, NSEC_PER_SEC * (pre_div + 1)); + if (duty_cnt > 0xffff) { + dev_err(meson->chip.dev, "unable to get duty cycle\n"); + return -EINVAL; + } + +- dev_dbg(meson->chip.dev, "duty=%u pre_div=%u duty_cnt=%u\n", ++ dev_dbg(meson->chip.dev, "duty=%llu pre_div=%u duty_cnt=%u\n", + duty, pre_div, duty_cnt); + + channel->pre_div = pre_div; +-- +2.39.2 + diff --git a/queue-5.4/pwm-meson-remove-redundant-assignment-to-variable-fi.patch b/queue-5.4/pwm-meson-remove-redundant-assignment-to-variable-fi.patch new file mode 100644 index 00000000000..7d2d067d182 --- /dev/null +++ b/queue-5.4/pwm-meson-remove-redundant-assignment-to-variable-fi.patch @@ -0,0 +1,42 @@ +From 33ec356e57de6836404d0414c193301d16aaab21 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Apr 2020 12:08:57 +0100 +Subject: pwm: meson: Remove redundant assignment to variable fin_freq +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Colin Ian King + +[ Upstream commit 437fb760d046340d0dee3b4307e1cf4578fd8ca8 ] + +The variable fin_freq is being initialized with a value that is never +read and it is being updated later with a new value. The initialization +is redundant and can be removed. + +Addresses-Coverity: ("Unused value") +Signed-off-by: Colin Ian King +Acked-by: Uwe Kleine-König +Signed-off-by: Thierry Reding +Stable-dep-of: 87a2cbf02d77 ("pwm: meson: fix handling of period/duty if greater than UINT_MAX") +Signed-off-by: Sasha Levin +--- + drivers/pwm/pwm-meson.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c +index 9fc08d1de34c2..5cab720d50680 100644 +--- a/drivers/pwm/pwm-meson.c ++++ b/drivers/pwm/pwm-meson.c +@@ -163,7 +163,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + { + struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + unsigned int duty, period, pre_div, cnt, duty_cnt; +- unsigned long fin_freq = -1; ++ unsigned long fin_freq; + + duty = state->duty_cycle; + period = state->period; +-- +2.39.2 + diff --git a/queue-5.4/pwm-meson-simplify-duplicated-per-channel-tracking.patch b/queue-5.4/pwm-meson-simplify-duplicated-per-channel-tracking.patch new file mode 100644 index 00000000000..8aa110c7c0d --- /dev/null +++ b/queue-5.4/pwm-meson-simplify-duplicated-per-channel-tracking.patch @@ -0,0 +1,77 @@ +From 03535da81a40311121d7b2fca8954c9325ee4877 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 14:46:26 +0100 +Subject: pwm: meson: Simplify duplicated per-channel tracking +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Uwe Kleine-König + +[ Upstream commit 5f97f18feac9bd5a8163b108aee52d783114b36f ] + +The driver tracks per-channel data via struct pwm_device::chip_data and +struct meson_pwm::channels[]. The latter holds the actual data, the former +is only a pointer to the latter. So simplify by using struct +meson_pwm::channels[] consistently. + +Signed-off-by: Uwe Kleine-König +Reviewed-by: Martin Blumenstingl +Signed-off-by: Thierry Reding +Stable-dep-of: 87a2cbf02d77 ("pwm: meson: fix handling of period/duty if greater than UINT_MAX") +Signed-off-by: Sasha Levin +--- + drivers/pwm/pwm-meson.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/pwm/pwm-meson.c b/drivers/pwm/pwm-meson.c +index 5cab720d50680..d594dac41f6da 100644 +--- a/drivers/pwm/pwm-meson.c ++++ b/drivers/pwm/pwm-meson.c +@@ -147,12 +147,13 @@ static int meson_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm) + return err; + } + +- return pwm_set_chip_data(pwm, channel); ++ return 0; + } + + static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) + { +- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); ++ struct meson_pwm *meson = to_meson_pwm(chip); ++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; + + if (channel) + clk_disable_unprepare(channel->clk); +@@ -161,7 +162,7 @@ static void meson_pwm_free(struct pwm_chip *chip, struct pwm_device *pwm) + static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + const struct pwm_state *state) + { +- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); ++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; + unsigned int duty, period, pre_div, cnt, duty_cnt; + unsigned long fin_freq; + +@@ -230,7 +231,7 @@ static int meson_pwm_calc(struct meson_pwm *meson, struct pwm_device *pwm, + + static void meson_pwm_enable(struct meson_pwm *meson, struct pwm_device *pwm) + { +- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); ++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; + struct meson_pwm_channel_data *channel_data; + unsigned long flags; + u32 value; +@@ -273,8 +274,8 @@ static void meson_pwm_disable(struct meson_pwm *meson, struct pwm_device *pwm) + static int meson_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm, + const struct pwm_state *state) + { +- struct meson_pwm_channel *channel = pwm_get_chip_data(pwm); + struct meson_pwm *meson = to_meson_pwm(chip); ++ struct meson_pwm_channel *channel = &meson->channels[pwm->hwpwm]; + int err = 0; + + if (!state) +-- +2.39.2 + diff --git a/queue-5.4/scsi-qla2xxx-array-index-may-go-out-of-bound.patch b/queue-5.4/scsi-qla2xxx-array-index-may-go-out-of-bound.patch new file mode 100644 index 00000000000..04742b7ee9f --- /dev/null +++ b/queue-5.4/scsi-qla2xxx-array-index-may-go-out-of-bound.patch @@ -0,0 +1,41 @@ +From 371580d6b84d6d773774067e7e566b6a278a057b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Jun 2023 17:08:36 +0530 +Subject: scsi: qla2xxx: Array index may go out of bound + +From: Nilesh Javali + +[ Upstream commit d721b591b95cf3f290f8a7cbe90aa2ee0368388d ] + +Klocwork reports array 'vha->host_str' of size 16 may use index value(s) +16..19. Use snprintf() instead of sprintf(). + +Cc: stable@vger.kernel.org +Co-developed-by: Bikash Hazarika +Signed-off-by: Bikash Hazarika +Signed-off-by: Nilesh Javali +Link: https://lore.kernel.org/r/20230607113843.37185-2-njavali@marvell.com +Reviewed-by: Himanshu Madhani +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_os.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index 9bd73a5a722b4..99d4bc2ab5a91 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -4831,7 +4831,8 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, + } + INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn); + +- sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no); ++ snprintf(vha->host_str, sizeof(vha->host_str), "%s_%lu", ++ QLA2XXX_DRIVER_NAME, vha->host_no); + ql_dbg(ql_dbg_init, vha, 0x0041, + "Allocated the host=%p hw=%p vha=%p dev_name=%s", + vha->host, vha->hw, vha, +-- +2.39.2 + diff --git a/queue-5.4/scsi-qla2xxx-fix-inconsistent-format-argument-type-i.patch b/queue-5.4/scsi-qla2xxx-fix-inconsistent-format-argument-type-i.patch new file mode 100644 index 00000000000..f187f849597 --- /dev/null +++ b/queue-5.4/scsi-qla2xxx-fix-inconsistent-format-argument-type-i.patch @@ -0,0 +1,53 @@ +From 44e268c9dbf8b489725dde14df85a3de6364bab6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 30 Sep 2020 10:25:14 +0800 +Subject: scsi: qla2xxx: Fix inconsistent format argument type in qla_os.c + +From: Ye Bin + +[ Upstream commit 250bd00923c72c846092271a9e51ee373db081b6 ] + +Fix the following warnings: + +[drivers/scsi/qla2xxx/qla_os.c:4882]: (warning) %ld in format string (no. 2) + requires 'long' but the argument type is 'unsigned long'. +[drivers/scsi/qla2xxx/qla_os.c:5011]: (warning) %ld in format string (no. 1) + requires 'long' but the argument type is 'unsigned long'. + +Link: https://lore.kernel.org/r/20200930022515.2862532-3-yebin10@huawei.com +Reported-by: Hulk Robot +Reviewed-by: Himanshu Madhani +Reviewed-by: Nilesh Javali +Signed-off-by: Ye Bin +Signed-off-by: Martin K. Petersen +Stable-dep-of: d721b591b95c ("scsi: qla2xxx: Array index may go out of bound") +Signed-off-by: Sasha Levin +--- + drivers/scsi/qla2xxx/qla_os.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c +index 30a5ca9c5a8d4..9bd73a5a722b4 100644 +--- a/drivers/scsi/qla2xxx/qla_os.c ++++ b/drivers/scsi/qla2xxx/qla_os.c +@@ -4831,7 +4831,7 @@ struct scsi_qla_host *qla2x00_create_host(struct scsi_host_template *sht, + } + INIT_DELAYED_WORK(&vha->scan.scan_work, qla_scan_work_fn); + +- sprintf(vha->host_str, "%s_%ld", QLA2XXX_DRIVER_NAME, vha->host_no); ++ sprintf(vha->host_str, "%s_%lu", QLA2XXX_DRIVER_NAME, vha->host_no); + ql_dbg(ql_dbg_init, vha, 0x0041, + "Allocated the host=%p hw=%p vha=%p dev_name=%s", + vha->host, vha->hw, vha, +@@ -4961,7 +4961,7 @@ qla2x00_uevent_emit(struct scsi_qla_host *vha, u32 code) + + switch (code) { + case QLA_UEVENT_CODE_FW_DUMP: +- snprintf(event_string, sizeof(event_string), "FW_DUMP=%ld", ++ snprintf(event_string, sizeof(event_string), "FW_DUMP=%lu", + vha->host_no); + break; + default: +-- +2.39.2 + diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..cd22fdfc1ed --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,30 @@ +jbd2-fix-incorrect-code-style.patch +jbd2-fix-kernel-doc-markups.patch +jbd2-remove-redundant-buffer-io-error-checks.patch +jbd2-recheck-chechpointing-non-dirty-buffer.patch +jbd2-fix-wrongly-judgement-for-buffer-head-removing-.patch +gpio-tps68470-make-tps68470_gpio_output-always-set-t.patch +bcache-remove-int-n-from-parameter-list-of-bch_bucke.patch +bcache-fix-__bch_btree_node_alloc-to-make-the-failur.patch +btrfs-qgroup-catch-reserved-space-leaks-at-unmount-t.patch +btrfs-fix-race-between-quota-disable-and-relocation.patch +btrfs-fix-extent-buffer-leak-after-tree-mod-log-fail.patch +ext4-rename-journal_dev-to-s_journal_dev-inside-ext4.patch +ext4-fix-reusing-stale-buffer-heads-from-last-failed.patch +pci-aspm-return-0-or-etimedout-from-pcie_retrain_lin.patch +pci-aspm-factor-out-pcie_wait_for_retrain.patch +pci-aspm-avoid-link-retraining-race.patch +dlm-cleanup-plock_op-vs-plock_xop.patch +dlm-rearrange-async-condition-return.patch +fs-dlm-interrupt-posix-locks-only-when-process-is-ki.patch +ftrace-add-information-on-number-of-page-groups-allo.patch +ftrace-check-if-pages-were-allocated-before-calling-.patch +ftrace-store-the-order-of-pages-allocated-in-ftrace_.patch +ftrace-fix-possible-warning-on-checking-all-pages-us.patch +pwm-meson-remove-redundant-assignment-to-variable-fi.patch +pwm-meson-simplify-duplicated-per-channel-tracking.patch +pwm-meson-fix-handling-of-period-duty-if-greater-tha.patch +scsi-qla2xxx-fix-inconsistent-format-argument-type-i.patch +scsi-qla2xxx-array-index-may-go-out-of-bound.patch +uapi-general-notification-queue-definitions.patch +keys-fix-linking-a-duplicate-key-to-a-keyring-s-asso.patch diff --git a/queue-5.4/uapi-general-notification-queue-definitions.patch b/queue-5.4/uapi-general-notification-queue-definitions.patch new file mode 100644 index 00000000000..4cf885977d7 --- /dev/null +++ b/queue-5.4/uapi-general-notification-queue-definitions.patch @@ -0,0 +1,114 @@ +From a93abd8edd3219890cdd278a509143e3d1c59fdb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Feb 2020 13:58:35 +0000 +Subject: uapi: General notification queue definitions + +From: David Howells + +[ Upstream commit 0858caa419e6cf9d31e734d09d70b34f64443ef6 ] + +Add UAPI definitions for the general notification queue, including the +following pieces: + + (*) struct watch_notification. + + This is the metadata header for notification messages. It includes a + type and subtype that indicate the source of the message + (eg. WATCH_TYPE_MOUNT_NOTIFY) and the kind of the message + (eg. NOTIFY_MOUNT_NEW_MOUNT). + + The header also contains an information field that conveys the + following information: + + - WATCH_INFO_LENGTH. The size of the entry (entries are variable + length). + + - WATCH_INFO_ID. The watch ID specified when the watchpoint was + set. + + - WATCH_INFO_TYPE_INFO. (Sub)type-specific information. + + - WATCH_INFO_FLAG_*. Flag bits overlain on the type-specific + information. For use by the type. + + All the information in the header can be used in filtering messages at + the point of writing into the buffer. + + (*) struct watch_notification_removal + + This is an extended watch-removal notification record that includes an + 'id' field that can indicate the identifier of the object being + removed if available (for instance, a keyring serial number). + +Signed-off-by: David Howells +Stable-dep-of: d55901522f96 ("keys: Fix linking a duplicate key to a keyring's assoc_array") +Signed-off-by: Sasha Levin +--- + include/uapi/linux/watch_queue.h | 55 ++++++++++++++++++++++++++++++++ + 1 file changed, 55 insertions(+) + create mode 100644 include/uapi/linux/watch_queue.h + +diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h +new file mode 100644 +index 0000000000000..5f3d21e8a34b0 +--- /dev/null ++++ b/include/uapi/linux/watch_queue.h +@@ -0,0 +1,55 @@ ++/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ ++#ifndef _UAPI_LINUX_WATCH_QUEUE_H ++#define _UAPI_LINUX_WATCH_QUEUE_H ++ ++#include ++ ++enum watch_notification_type { ++ WATCH_TYPE_META = 0, /* Special record */ ++ WATCH_TYPE__NR = 1 ++}; ++ ++enum watch_meta_notification_subtype { ++ WATCH_META_REMOVAL_NOTIFICATION = 0, /* Watched object was removed */ ++ WATCH_META_LOSS_NOTIFICATION = 1, /* Data loss occurred */ ++}; ++ ++/* ++ * Notification record header. This is aligned to 64-bits so that subclasses ++ * can contain __u64 fields. ++ */ ++struct watch_notification { ++ __u32 type:24; /* enum watch_notification_type */ ++ __u32 subtype:8; /* Type-specific subtype (filterable) */ ++ __u32 info; ++#define WATCH_INFO_LENGTH 0x0000007f /* Length of record */ ++#define WATCH_INFO_LENGTH__SHIFT 0 ++#define WATCH_INFO_ID 0x0000ff00 /* ID of watchpoint */ ++#define WATCH_INFO_ID__SHIFT 8 ++#define WATCH_INFO_TYPE_INFO 0xffff0000 /* Type-specific info */ ++#define WATCH_INFO_TYPE_INFO__SHIFT 16 ++#define WATCH_INFO_FLAG_0 0x00010000 /* Type-specific info, flag bit 0 */ ++#define WATCH_INFO_FLAG_1 0x00020000 /* ... */ ++#define WATCH_INFO_FLAG_2 0x00040000 ++#define WATCH_INFO_FLAG_3 0x00080000 ++#define WATCH_INFO_FLAG_4 0x00100000 ++#define WATCH_INFO_FLAG_5 0x00200000 ++#define WATCH_INFO_FLAG_6 0x00400000 ++#define WATCH_INFO_FLAG_7 0x00800000 ++}; ++ ++ ++/* ++ * Extended watch removal notification. This is used optionally if the type ++ * wants to indicate an identifier for the object being watched, if there is ++ * such. This can be distinguished by the length. ++ * ++ * type -> WATCH_TYPE_META ++ * subtype -> WATCH_META_REMOVAL_NOTIFICATION ++ */ ++struct watch_notification_removal { ++ struct watch_notification watch; ++ __u64 id; /* Type-dependent identifier */ ++}; ++ ++#endif /* _UAPI_LINUX_WATCH_QUEUE_H */ +-- +2.39.2 +