From: Greg Kroah-Hartman Date: Sun, 3 Apr 2022 11:42:40 +0000 (+0200) Subject: 5.17-stable patches X-Git-Tag: v5.17.2~150 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=a6567c46170de46a1d4e4247f6340fbdd801e807;p=thirdparty%2Fkernel%2Fstable-queue.git 5.17-stable patches added patches: nvme-allow-duplicate-nsids-for-private-namespaces.patch nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch ubifs-fix-to-add-refcount-once-page-is-set-private.patch ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch ubifs-rename-whiteout-atomically.patch ubifs-rename_whiteout-correct-old_dir-size-computing.patch ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch --- diff --git a/queue-5.17/nvme-allow-duplicate-nsids-for-private-namespaces.patch b/queue-5.17/nvme-allow-duplicate-nsids-for-private-namespaces.patch new file mode 100644 index 00000000000..1771c89eb4a --- /dev/null +++ b/queue-5.17/nvme-allow-duplicate-nsids-for-private-namespaces.patch @@ -0,0 +1,127 @@ +From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001 +From: Sungup Moon +Date: Mon, 14 Mar 2022 20:05:45 +0900 +Subject: nvme: allow duplicate NSIDs for private namespaces + +From: Sungup Moon + +commit 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e upstream. + +A NVMe subsystem with multiple controller can have private namespaces +that use the same NSID under some conditions: + + "If Namespace Management, ANA Reporting, or NVM Sets are supported, the + NSIDs shall be unique within the NVM subsystem. If the Namespace + Management, ANA Reporting, and NVM Sets are not supported, then NSIDs: + a) for shared namespace shall be unique; and + b) for private namespace are not required to be unique." + +Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec. + +Make sure this specific setup is supported in Linux. + +Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head") +Signed-off-by: Sungup Moon +[hch: refactored and fixed the controller vs subsystem based naming + conflict] +Signed-off-by: Christoph Hellwig +Reviewed-by: Sagi Grimberg +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/core.c | 15 ++++++++++----- + drivers/nvme/host/multipath.c | 7 ++++--- + drivers/nvme/host/nvme.h | 19 +++++++++++++++++++ + include/linux/nvme.h | 1 + + 4 files changed, 34 insertions(+), 8 deletions(-) + +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -3574,15 +3574,20 @@ static const struct attribute_group *nvm + NULL, + }; + +-static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, ++static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, + unsigned nsid) + { + struct nvme_ns_head *h; + +- lockdep_assert_held(&subsys->lock); ++ lockdep_assert_held(&ctrl->subsys->lock); + +- list_for_each_entry(h, &subsys->nsheads, entry) { +- if (h->ns_id != nsid) ++ list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { ++ /* ++ * Private namespaces can share NSIDs under some conditions. ++ * In that case we can't use the same ns_head for namespaces ++ * with the same NSID. ++ */ ++ if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) + continue; + if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) + return h; +@@ -3750,7 +3755,7 @@ static int nvme_init_ns_head(struct nvme + int ret = 0; + + mutex_lock(&ctrl->subsys->lock); +- head = nvme_find_ns_head(ctrl->subsys, nsid); ++ head = nvme_find_ns_head(ctrl, nsid); + if (!head) { + head = nvme_alloc_ns_head(ctrl, nsid, ids); + if (IS_ERR(head)) { +--- a/drivers/nvme/host/multipath.c ++++ b/drivers/nvme/host/multipath.c +@@ -504,10 +504,11 @@ int nvme_mpath_alloc_disk(struct nvme_ct + + /* + * Add a multipath node if the subsystems supports multiple controllers. +- * We also do this for private namespaces as the namespace sharing data could +- * change after a rescan. ++ * We also do this for private namespaces as the namespace sharing flag ++ * could change after a rescan. + */ +- if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) ++ if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || ++ !nvme_is_unique_nsid(ctrl, head) || !multipath) + return 0; + + head->disk = blk_alloc_disk(ctrl->numa_node); +--- a/drivers/nvme/host/nvme.h ++++ b/drivers/nvme/host/nvme.h +@@ -716,6 +716,25 @@ static inline bool nvme_check_ready(stru + return queue_live; + return __nvme_check_ready(ctrl, rq, queue_live); + } ++ ++/* ++ * NSID shall be unique for all shared namespaces, or if at least one of the ++ * following conditions is met: ++ * 1. Namespace Management is supported by the controller ++ * 2. ANA is supported by the controller ++ * 3. NVM Set are supported by the controller ++ * ++ * In other case, private namespace are not required to report a unique NSID. ++ */ ++static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, ++ struct nvme_ns_head *head) ++{ ++ return head->shared || ++ (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || ++ (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || ++ (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); ++} ++ + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, + void *buf, unsigned bufflen); + int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, +--- a/include/linux/nvme.h ++++ b/include/linux/nvme.h +@@ -337,6 +337,7 @@ enum { + NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, + NVME_CTRL_VWC_PRESENT = 1 << 0, + NVME_CTRL_OACS_SEC_SUPP = 1 << 0, ++ NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, + NVME_CTRL_OACS_DIRECTIVES = 1 << 5, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, diff --git a/queue-5.17/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch b/queue-5.17/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch new file mode 100644 index 00000000000..8cf5d73ae44 --- /dev/null +++ b/queue-5.17/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch @@ -0,0 +1,66 @@ +From 726be2c72efc0a64c206e854b8996ad3ab9c7507 Mon Sep 17 00:00:00 2001 +From: Pankaj Raghav +Date: Tue, 22 Mar 2022 10:20:48 +0100 +Subject: nvme: fix the read-only state for zoned namespaces with unsupposed features + +From: Pankaj Raghav + +commit 726be2c72efc0a64c206e854b8996ad3ab9c7507 upstream. + +commit 2f4c9ba23b88 ("nvme: export zoned namespaces without Zone Append +support read-only") marks zoned namespaces without append support +read-only. It does iso by setting NVME_NS_FORCE_RO in ns->flags in +nvme_update_zone_info and checking for that flag later in +nvme_update_disk_info to mark the disk as read-only. + +But commit 73d90386b559 ("nvme: cleanup zone information initialization") +rearranged nvme_update_disk_info to be called before +nvme_update_zone_info and thus not marking the disk as read-only. +The call order cannot be just reverted because nvme_update_zone_info sets +certain queue parameters such as zone_write_granularity that depend on the +prior call to nvme_update_disk_info. + +Remove the call to set_disk_ro in nvme_update_disk_info. and call +set_disk_ro after nvme_update_zone_info and nvme_update_disk_info to set +the permission for ZNS drives correctly. The same applies to the +multipath disk path. + +Fixes: 73d90386b559 ("nvme: cleanup zone information initialization") +Signed-off-by: Pankaj Raghav +Signed-off-by: Christoph Hellwig +Signed-off-by: Greg Kroah-Hartman +--- + drivers/nvme/host/core.c | 8 +++++--- + 1 file changed, 5 insertions(+), 3 deletions(-) + +--- a/drivers/nvme/host/core.c ++++ b/drivers/nvme/host/core.c +@@ -1857,9 +1857,6 @@ static void nvme_update_disk_info(struct + nvme_config_discard(disk, ns); + blk_queue_max_write_zeroes_sectors(disk->queue, + ns->ctrl->max_zeroes_sectors); +- +- set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || +- test_bit(NVME_NS_FORCE_RO, &ns->flags)); + } + + static inline bool nvme_first_scan(struct gendisk *disk) +@@ -1918,6 +1915,8 @@ static int nvme_update_ns_info(struct nv + goto out_unfreeze; + } + ++ set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || ++ test_bit(NVME_NS_FORCE_RO, &ns->flags)); + set_bit(NVME_NS_READY, &ns->flags); + blk_mq_unfreeze_queue(ns->disk->queue); + +@@ -1930,6 +1929,9 @@ static int nvme_update_ns_info(struct nv + if (nvme_ns_head_multipath(ns->head)) { + blk_mq_freeze_queue(ns->head->disk->queue); + nvme_update_disk_info(ns->head->disk, ns, id); ++ set_disk_ro(ns->head->disk, ++ (id->nsattr & NVME_NS_ATTR_RO) || ++ test_bit(NVME_NS_FORCE_RO, &ns->flags)); + nvme_mpath_revalidate_paths(ns); + blk_stack_limits(&ns->head->disk->queue->limits, + &ns->queue->limits, 0); diff --git a/queue-5.17/series b/queue-5.17/series index 0111518848c..0d7be4cfa7f 100644 --- a/queue-5.17/series +++ b/queue-5.17/series @@ -1002,3 +1002,15 @@ kvm-prevent-module-exit-until-all-vms-are-freed.patch kvm-x86-fix-sending-pv-ipi.patch kvm-svm-fix-panic-on-out-of-bounds-guest-irq.patch kvm-avoid-double-put_page-with-gfn-to-pfn-cache.patch +ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch +ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch +ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch +ubifs-rename-whiteout-atomically.patch +ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch +ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch +ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch +ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch +ubifs-fix-to-add-refcount-once-page-is-set-private.patch +ubifs-rename_whiteout-correct-old_dir-size-computing.patch +nvme-allow-duplicate-nsids-for-private-namespaces.patch +nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch diff --git a/queue-5.17/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch b/queue-5.17/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch new file mode 100644 index 00000000000..a862dc5454c --- /dev/null +++ b/queue-5.17/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch @@ -0,0 +1,35 @@ +From 716b4573026bcbfa7b58ed19fe15554bac66b082 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:35 +0800 +Subject: ubifs: Add missing iput if do_tmpfile() failed in rename whiteout + +From: Zhihao Cheng + +commit 716b4573026bcbfa7b58ed19fe15554bac66b082 upstream. + +whiteout inode should be put when do_tmpfile() failed if inode has been +initialized. Otherwise we will get following warning during umount: + UBIFS error (ubi0:0 pid 1494): ubifs_assert_failed [ubifs]: UBIFS + assert failed: c->bi.dd_growth == 0, in fs/ubifs/super.c:1930 + VFS: Busy inodes after unmount of ubifs. Self-destruct in 5 seconds. + +Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") +Signed-off-by: Zhihao Cheng +Suggested-by: Sascha Hauer +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -432,6 +432,8 @@ out_inode: + make_bad_inode(inode); + if (!instantiated) + iput(inode); ++ else if (whiteout) ++ iput(*whiteout); + out_budg: + ubifs_release_budget(c, &req); + if (!instantiated) diff --git a/queue-5.17/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch b/queue-5.17/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch new file mode 100644 index 00000000000..63599b85553 --- /dev/null +++ b/queue-5.17/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch @@ -0,0 +1,118 @@ +From afd427048047e8efdedab30e8888044e2be5aa9c Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:33 +0800 +Subject: ubifs: Fix deadlock in concurrent rename whiteout and inode writeback +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Zhihao Cheng + +commit afd427048047e8efdedab30e8888044e2be5aa9c upstream. + +Following hung tasks: +[ 77.028764] task:kworker/u8:4 state:D stack: 0 pid: 132 +[ 77.028820] Call Trace: +[ 77.029027] schedule+0x8c/0x1b0 +[ 77.029067] mutex_lock+0x50/0x60 +[ 77.029074] ubifs_write_inode+0x68/0x1f0 [ubifs] +[ 77.029117] __writeback_single_inode+0x43c/0x570 +[ 77.029128] writeback_sb_inodes+0x259/0x740 +[ 77.029148] wb_writeback+0x107/0x4d0 +[ 77.029163] wb_workfn+0x162/0x7b0 + +[ 92.390442] task:aa state:D stack: 0 pid: 1506 +[ 92.390448] Call Trace: +[ 92.390458] schedule+0x8c/0x1b0 +[ 92.390461] wb_wait_for_completion+0x82/0xd0 +[ 92.390469] __writeback_inodes_sb_nr+0xb2/0x110 +[ 92.390472] writeback_inodes_sb_nr+0x14/0x20 +[ 92.390476] ubifs_budget_space+0x705/0xdd0 [ubifs] +[ 92.390503] do_rename.cold+0x7f/0x187 [ubifs] +[ 92.390549] ubifs_rename+0x8b/0x180 [ubifs] +[ 92.390571] vfs_rename+0xdb2/0x1170 +[ 92.390580] do_renameat2+0x554/0x770 + +, are caused by concurrent rename whiteout and inode writeback processes: + rename_whiteout(Thread 1) wb_workfn(Thread2) +ubifs_rename + do_rename + lock_4_inodes (Hold ui_mutex) + ubifs_budget_space + make_free_space + shrink_liability + __writeback_inodes_sb_nr + bdi_split_work_to_wbs (Queue new wb work) + wb_do_writeback(wb work) + __writeback_single_inode + ubifs_write_inode + LOCK(ui_mutex) + ↑ + wb_wait_for_completion (Wait wb work) <-- deadlock! + +Reproducer (Detail program in [Link]): + 1. SYS_renameat2("/mp/dir/file", "/mp/dir/whiteout", RENAME_WHITEOUT) + 2. Consume out of space before kernel(mdelay) doing budget for whiteout + +Fix it by doing whiteout space budget before locking ubifs inodes. +BTW, it also fixes wrong goto tag 'out_release' in whiteout budget +error handling path(It should at least recover dir i_size and unlock +4 ubifs inodes). + +Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=214733 +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 25 +++++++++++++++---------- + 1 file changed, 15 insertions(+), 10 deletions(-) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -1324,6 +1324,7 @@ static int do_rename(struct inode *old_d + + if (flags & RENAME_WHITEOUT) { + union ubifs_dev_desc *dev = NULL; ++ struct ubifs_budget_req wht_req; + + dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!dev) { +@@ -1345,6 +1346,20 @@ static int do_rename(struct inode *old_d + whiteout_ui->data = dev; + whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); + ubifs_assert(c, !whiteout_ui->dirty); ++ ++ memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); ++ wht_req.dirtied_ino = 1; ++ wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8); ++ /* ++ * To avoid deadlock between space budget (holds ui_mutex and ++ * waits wb work) and writeback work(waits ui_mutex), do space ++ * budget before ubifs inodes locked. ++ */ ++ err = ubifs_budget_space(c, &wht_req); ++ if (err) { ++ iput(whiteout); ++ goto out_release; ++ } + } + + lock_4_inodes(old_dir, new_dir, new_inode, whiteout); +@@ -1419,16 +1434,6 @@ static int do_rename(struct inode *old_d + } + + if (whiteout) { +- struct ubifs_budget_req wht_req = { .dirtied_ino = 1, +- .dirtied_ino_d = \ +- ALIGN(ubifs_inode(whiteout)->data_len, 8) }; +- +- err = ubifs_budget_space(c, &wht_req); +- if (err) { +- iput(whiteout); +- goto out_release; +- } +- + inc_nlink(whiteout); + mark_inode_dirty(whiteout); + diff --git a/queue-5.17/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch b/queue-5.17/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch new file mode 100644 index 00000000000..74b23c8ed7d --- /dev/null +++ b/queue-5.17/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch @@ -0,0 +1,109 @@ +From 4f2262a334641e05f645364d5ade1f565c85f20b Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:40 +0800 +Subject: ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock() + +From: Zhihao Cheng + +commit 4f2262a334641e05f645364d5ade1f565c85f20b upstream. + +Function ubifs_wbuf_write_nolock() may access buf out of bounds in +following process: + +ubifs_wbuf_write_nolock(): + aligned_len = ALIGN(len, 8); // Assume len = 4089, aligned_len = 4096 + if (aligned_len <= wbuf->avail) ... // Not satisfy + if (wbuf->used) { + ubifs_leb_write() // Fill some data in avail wbuf + len -= wbuf->avail; // len is still not 8-bytes aligned + aligned_len -= wbuf->avail; + } + n = aligned_len >> c->max_write_shift; + if (n) { + n <<= c->max_write_shift; + err = ubifs_leb_write(c, wbuf->lnum, buf + written, + wbuf->offs, n); + // n > len, read out of bounds less than 8(n-len) bytes + } + +, which can be catched by KASAN: + ========================================================= + BUG: KASAN: slab-out-of-bounds in ecc_sw_hamming_calculate+0x1dc/0x7d0 + Read of size 4 at addr ffff888105594ff8 by task kworker/u8:4/128 + Workqueue: writeback wb_workfn (flush-ubifs_0_0) + Call Trace: + kasan_report.cold+0x81/0x165 + nand_write_page_swecc+0xa9/0x160 + ubifs_leb_write+0xf2/0x1b0 [ubifs] + ubifs_wbuf_write_nolock+0x421/0x12c0 [ubifs] + write_head+0xdc/0x1c0 [ubifs] + ubifs_jnl_write_inode+0x627/0x960 [ubifs] + wb_workfn+0x8af/0xb80 + +Function ubifs_wbuf_write_nolock() accepts that parameter 'len' is not 8 +bytes aligned, the 'len' represents the true length of buf (which is +allocated in 'ubifs_jnl_xxx', eg. ubifs_jnl_write_inode), so +ubifs_wbuf_write_nolock() must handle the length read from 'buf' carefully +to write leb safely. + +Fetch a reproducer in [Link]. + +Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=214785 +Reported-by: Chengsong Ke +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/io.c | 34 ++++++++++++++++++++++++++++++---- + 1 file changed, 30 insertions(+), 4 deletions(-) + +--- a/fs/ubifs/io.c ++++ b/fs/ubifs/io.c +@@ -854,16 +854,42 @@ int ubifs_wbuf_write_nolock(struct ubifs + */ + n = aligned_len >> c->max_write_shift; + if (n) { +- n <<= c->max_write_shift; ++ int m = n - 1; ++ + dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, + wbuf->offs); +- err = ubifs_leb_write(c, wbuf->lnum, buf + written, +- wbuf->offs, n); ++ ++ if (m) { ++ /* '(n-1)<max_write_shift < len' is always true. */ ++ m <<= c->max_write_shift; ++ err = ubifs_leb_write(c, wbuf->lnum, buf + written, ++ wbuf->offs, m); ++ if (err) ++ goto out; ++ wbuf->offs += m; ++ aligned_len -= m; ++ len -= m; ++ written += m; ++ } ++ ++ /* ++ * The non-written len of buf may be less than 'n' because ++ * parameter 'len' is not 8 bytes aligned, so here we read ++ * min(len, n) bytes from buf. ++ */ ++ n = 1 << c->max_write_shift; ++ memcpy(wbuf->buf, buf + written, min(len, n)); ++ if (n > len) { ++ ubifs_assert(c, n - len < 8); ++ ubifs_pad(c, wbuf->buf + len, n - len); ++ } ++ ++ err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n); + if (err) + goto out; + wbuf->offs += n; + aligned_len -= n; +- len -= n; ++ len -= min(len, n); + written += n; + } + diff --git a/queue-5.17/ubifs-fix-to-add-refcount-once-page-is-set-private.patch b/queue-5.17/ubifs-fix-to-add-refcount-once-page-is-set-private.patch new file mode 100644 index 00000000000..2b57de642c4 --- /dev/null +++ b/queue-5.17/ubifs-fix-to-add-refcount-once-page-is-set-private.patch @@ -0,0 +1,184 @@ +From 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:41 +0800 +Subject: ubifs: Fix to add refcount once page is set private + +From: Zhihao Cheng + +commit 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b upstream. + +MM defined the rule [1] very clearly that once page was set with PG_private +flag, we should increment the refcount in that page, also main flows like +pageout(), migrate_page() will assume there is one additional page +reference count if page_has_private() returns true. Otherwise, we may +get a BUG in page migration: + + page:0000000080d05b9d refcount:-1 mapcount:0 mapping:000000005f4d82a8 + index:0xe2 pfn:0x14c12 + aops:ubifs_file_address_operations [ubifs] ino:8f1 dentry name:"f30e" + flags: 0x1fffff80002405(locked|uptodate|owner_priv_1|private|node=0| + zone=1|lastcpupid=0x1fffff) + page dumped because: VM_BUG_ON_PAGE(page_count(page) != 0) + ------------[ cut here ]------------ + kernel BUG at include/linux/page_ref.h:184! + invalid opcode: 0000 [#1] SMP + CPU: 3 PID: 38 Comm: kcompactd0 Not tainted 5.15.0-rc5 + RIP: 0010:migrate_page_move_mapping+0xac3/0xe70 + Call Trace: + ubifs_migrate_page+0x22/0xc0 [ubifs] + move_to_new_page+0xb4/0x600 + migrate_pages+0x1523/0x1cc0 + compact_zone+0x8c5/0x14b0 + kcompactd+0x2bc/0x560 + kthread+0x18c/0x1e0 + ret_from_fork+0x1f/0x30 + +Before the time, we should make clean a concept, what does refcount means +in page gotten from grab_cache_page_write_begin(). There are 2 situations: +Situation 1: refcount is 3, page is created by __page_cache_alloc. + TYPE_A - the write process is using this page + TYPE_B - page is assigned to one certain mapping by calling + __add_to_page_cache_locked() + TYPE_C - page is added into pagevec list corresponding current cpu by + calling lru_cache_add() +Situation 2: refcount is 2, page is gotten from the mapping's tree + TYPE_B - page has been assigned to one certain mapping + TYPE_A - the write process is using this page (by calling + page_cache_get_speculative()) +Filesystem releases one refcount by calling put_page() in xxx_write_end(), +the released refcount corresponds to TYPE_A (write task is using it). If +there are any processes using a page, page migration process will skip the +page by judging whether expected_page_refs() equals to page refcount. + +The BUG is caused by following process: + PA(cpu 0) kcompactd(cpu 1) + compact_zone +ubifs_write_begin + page_a = grab_cache_page_write_begin + add_to_page_cache_lru + lru_cache_add + pagevec_add // put page into cpu 0's pagevec + (refcnf = 3, for page creation process) +ubifs_write_end + SetPagePrivate(page_a) // doesn't increase page count ! + unlock_page(page_a) + put_page(page_a) // refcnt = 2 + [...] + + PB(cpu 0) +filemap_read + filemap_get_pages + add_to_page_cache_lru + lru_cache_add + __pagevec_lru_add // traverse all pages in cpu 0's pagevec + __pagevec_lru_add_fn + SetPageLRU(page_a) + isolate_migratepages + isolate_migratepages_block + get_page_unless_zero(page_a) + // refcnt = 3 + list_add(page_a, from_list) + migrate_pages(from_list) + __unmap_and_move + move_to_new_page + ubifs_migrate_page(page_a) + migrate_page_move_mapping + expected_page_refs get 3 + (migration[1] + mapping[1] + private[1]) + release_pages + put_page_testzero(page_a) // refcnt = 3 + page_ref_freeze // refcnt = 0 + page_ref_dec_and_test(0 - 1 = -1) + page_ref_unfreeze + VM_BUG_ON_PAGE(-1 != 0, page) + +UBIFS doesn't increase the page refcount after setting private flag, which +leads to page migration task believes the page is not used by any other +processes, so the page is migrated. This causes concurrent accessing on +page refcount between put_page() called by other process(eg. read process +calls lru_cache_add) and page_ref_unfreeze() called by migration task. + +Actually zhangjun has tried to fix this problem [2] by recalculating page +refcnt in ubifs_migrate_page(). It's better to follow MM rules [1], because +just like Kirill suggested in [2], we need to check all users of +page_has_private() helper. Like f2fs does in [3], fix it by adding/deleting +refcount when setting/clearing private for a page. BTW, according to [4], +we set 'page->private' as 1 because ubifs just simply SetPagePrivate(). +And, [5] provided a common helper to set/clear page private, ubifs can +use this helper following the example of iomap, afs, btrfs, etc. + +Jump [6] to find a reproducer. + +[1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com +[2] https://www.spinics.net/lists/linux-mtd/msg04018.html +[3] http://lkml.iu.edu/hypermail/linux/kernel/1903.0/03313.html +[4] https://lore.kernel.org/linux-f2fs-devel/20210422154705.GO3596236@casper.infradead.org +[5] https://lore.kernel.org/all/20200517214718.468-1-guoqing.jiang@cloud.ionos.com +[6] https://bugzilla.kernel.org/show_bug.cgi?id=214961 + +Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/file.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/ubifs/file.c ++++ b/fs/ubifs/file.c +@@ -570,7 +570,7 @@ static int ubifs_write_end(struct file * + } + + if (!PagePrivate(page)) { +- SetPagePrivate(page); ++ attach_page_private(page, (void *)1); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } +@@ -947,7 +947,7 @@ static int do_writepage(struct page *pag + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + + kunmap(page); +@@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct + release_existing_page_budget(c); + + atomic_long_dec(&c->dirty_pg_cnt); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + } + +@@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct add + return rc; + + if (PagePrivate(page)) { +- ClearPagePrivate(page); +- SetPagePrivate(newpage); ++ detach_page_private(page); ++ attach_page_private(newpage, (void *)1); + } + + if (mode != MIGRATE_SYNC_NO_COPY) +@@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page + return 0; + ubifs_assert(c, PagePrivate(page)); + ubifs_assert(c, 0); +- ClearPagePrivate(page); ++ detach_page_private(page); + ClearPageChecked(page); + return 1; + } +@@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite( + else { + if (!PageChecked(page)) + ubifs_convert_page_budget(c); +- SetPagePrivate(page); ++ attach_page_private(page, (void *)1); + atomic_long_inc(&c->dirty_pg_cnt); + __set_page_dirty_nobuffers(page); + } diff --git a/queue-5.17/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch b/queue-5.17/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch new file mode 100644 index 00000000000..3a916a74d30 --- /dev/null +++ b/queue-5.17/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch @@ -0,0 +1,157 @@ +From 60eb3b9c9f11206996f57cb89521824304b305ad Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:37 +0800 +Subject: ubifs: Fix 'ui->dirty' race between do_tmpfile() and writeback work + +From: Zhihao Cheng + +commit 60eb3b9c9f11206996f57cb89521824304b305ad upstream. + +'ui->dirty' is not protected by 'ui_mutex' in function do_tmpfile() which +may race with ubifs_write_inode[wb_workfn] to access/update 'ui->dirty', +finally dirty space is released twice. + + open(O_TMPFILE) wb_workfn +do_tmpfile + ubifs_budget_space(ino_req = { .dirtied_ino = 1}) + d_tmpfile // mark inode(tmpfile) dirty + ubifs_jnl_update // without holding tmpfile's ui_mutex + mark_inode_clean(ui) + if (ui->dirty) + ubifs_release_dirty_inode_budget(ui) // release first time + ubifs_write_inode + mutex_lock(&ui->ui_mutex) + ubifs_release_dirty_inode_budget(ui) + // release second time + mutex_unlock(&ui->ui_mutex) + ui->dirty = 0 + +Run generic/476 can reproduce following message easily +(See reproducer in [Link]): + + UBIFS error (ubi0:0 pid 2578): ubifs_assert_failed [ubifs]: UBIFS assert + failed: c->bi.dd_growth >= 0, in fs/ubifs/budget.c:554 + UBIFS warning (ubi0:0 pid 2578): ubifs_ro_mode [ubifs]: switched to + read-only mode, error -22 + Workqueue: writeback wb_workfn (flush-ubifs_0_0) + Call Trace: + ubifs_ro_mode+0x54/0x60 [ubifs] + ubifs_assert_failed+0x4b/0x80 [ubifs] + ubifs_release_budget+0x468/0x5a0 [ubifs] + ubifs_release_dirty_inode_budget+0x53/0x80 [ubifs] + ubifs_write_inode+0x121/0x1f0 [ubifs] + ... + wb_workfn+0x283/0x7b0 + +Fix it by holding tmpfile ubifs inode lock during ubifs_jnl_update(). +Similar problem exists in whiteout renaming, but previous fix("ubifs: +Rename whiteout atomically") has solved the problem. + +Fixes: 474b93704f32163 ("ubifs: Implement O_TMPFILE") +Link: https://bugzilla.kernel.org/show_bug.cgi?id=214765 +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 60 ++++++++++++++++++++++++++++----------------------------- + 1 file changed, 30 insertions(+), 30 deletions(-) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -397,6 +397,32 @@ out_free: + return ERR_PTR(err); + } + ++/** ++ * lock_2_inodes - a wrapper for locking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ * ++ * We do not implement any tricks to guarantee strict lock ordering, because ++ * VFS has already done it for us on the @i_mutex. So this is just a simple ++ * wrapper function. ++ */ ++static void lock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); ++ mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); ++} ++ ++/** ++ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. ++ * @inode1: first inode ++ * @inode2: second inode ++ */ ++static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) ++{ ++ mutex_unlock(&ubifs_inode(inode2)->ui_mutex); ++ mutex_unlock(&ubifs_inode(inode1)->ui_mutex); ++} ++ + static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, + struct dentry *dentry, umode_t mode) + { +@@ -404,7 +430,7 @@ static int ubifs_tmpfile(struct user_nam + struct ubifs_info *c = dir->i_sb->s_fs_info; + struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; +- struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir); ++ struct ubifs_inode *ui; + int err, instantiated = 0; + struct fscrypt_name nm; + +@@ -452,18 +478,18 @@ static int ubifs_tmpfile(struct user_nam + instantiated = 1; + mutex_unlock(&ui->ui_mutex); + +- mutex_lock(&dir_ui->ui_mutex); ++ lock_2_inodes(dir, inode); + err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0); + if (err) + goto out_cancel; +- mutex_unlock(&dir_ui->ui_mutex); ++ unlock_2_inodes(dir, inode); + + ubifs_release_budget(c, &req); + + return 0; + + out_cancel: +- mutex_unlock(&dir_ui->ui_mutex); ++ unlock_2_inodes(dir, inode); + out_inode: + make_bad_inode(inode); + if (!instantiated) +@@ -690,32 +716,6 @@ static int ubifs_dir_release(struct inod + return 0; + } + +-/** +- * lock_2_inodes - a wrapper for locking two UBIFS inodes. +- * @inode1: first inode +- * @inode2: second inode +- * +- * We do not implement any tricks to guarantee strict lock ordering, because +- * VFS has already done it for us on the @i_mutex. So this is just a simple +- * wrapper function. +- */ +-static void lock_2_inodes(struct inode *inode1, struct inode *inode2) +-{ +- mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1); +- mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2); +-} +- +-/** +- * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes. +- * @inode1: first inode +- * @inode2: second inode +- */ +-static void unlock_2_inodes(struct inode *inode1, struct inode *inode2) +-{ +- mutex_unlock(&ubifs_inode(inode2)->ui_mutex); +- mutex_unlock(&ubifs_inode(inode1)->ui_mutex); +-} +- + static int ubifs_link(struct dentry *old_dentry, struct inode *dir, + struct dentry *dentry) + { diff --git a/queue-5.17/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch b/queue-5.17/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch new file mode 100644 index 00000000000..beaee3e84bd --- /dev/null +++ b/queue-5.17/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch @@ -0,0 +1,66 @@ +From a6dab6607d4681d227905d5198710b575dbdb519 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:38 +0800 +Subject: ubifs: Rectify space amount budget for mkdir/tmpfile operations + +From: Zhihao Cheng + +commit a6dab6607d4681d227905d5198710b575dbdb519 upstream. + +UBIFS should make sure the flash has enough space to store dirty (Data +that is newer than disk) data (in memory), space budget is exactly +designed to do that. If space budget calculates less data than we need, +'make_reservation()' will do more work(return -ENOSPC if no free space +lelf, sometimes we can see "cannot reserve xxx bytes in jhead xxx, error +-28" in ubifs error messages) with ubifs inodes locked, which may effect +other syscalls. + +A simple way to decide how much space do we need when make a budget: +See how much space is needed by 'make_reservation()' in ubifs_jnl_xxx() +function according to corresponding operation. + +It's better to report ENOSPC in ubifs_budget_space(), as early as we can. + +Fixes: 474b93704f32163 ("ubifs: Implement O_TMPFILE") +Fixes: 1e51764a3c2ac05 ("UBIFS: add new flash file system") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -428,15 +428,18 @@ static int ubifs_tmpfile(struct user_nam + { + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; +- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1}; ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .dirtied_ino = 1}; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1 }; + struct ubifs_inode *ui; + int err, instantiated = 0; + struct fscrypt_name nm; + + /* +- * Budget request settings: new dirty inode, new direntry, +- * budget for dirtied inode will be released via writeback. ++ * Budget request settings: new inode, new direntry, changing the ++ * parent directory inode. ++ * Allocate budget separately for new dirtied inode, the budget will ++ * be released via writeback. + */ + + dbg_gen("dent '%pd', mode %#hx in dir ino %lu", +@@ -979,7 +982,8 @@ static int ubifs_mkdir(struct user_names + struct ubifs_inode *dir_ui = ubifs_inode(dir); + struct ubifs_info *c = dir->i_sb->s_fs_info; + int err, sz_change; +- struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 }; ++ struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1, ++ .dirtied_ino = 1}; + struct fscrypt_name nm; + + /* diff --git a/queue-5.17/ubifs-rename-whiteout-atomically.patch b/queue-5.17/ubifs-rename-whiteout-atomically.patch new file mode 100644 index 00000000000..0cd0cecf63d --- /dev/null +++ b/queue-5.17/ubifs-rename-whiteout-atomically.patch @@ -0,0 +1,426 @@ +From 278d9a243635f26c05ad95dcf9c5a593b9e04dc6 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:36 +0800 +Subject: ubifs: Rename whiteout atomically + +From: Zhihao Cheng + +commit 278d9a243635f26c05ad95dcf9c5a593b9e04dc6 upstream. + +Currently, rename whiteout has 3 steps: + 1. create tmpfile(which associates old dentry to tmpfile inode) for + whiteout, and store tmpfile to disk + 2. link whiteout, associate whiteout inode to old dentry agagin and + store old dentry, old inode, new dentry on disk + 3. writeback dirty whiteout inode to disk + +Suddenly power-cut or error occurring(eg. ENOSPC returned by budget, +memory allocation failure) during above steps may cause kinds of problems: + Problem 1: ENOSPC returned by whiteout space budget (before step 2), + old dentry will disappear after rename syscall, whiteout file + cannot be found either. + + ls dir // we get file, whiteout + rename(dir/file, dir/whiteout, REANME_WHITEOUT) + ENOSPC = ubifs_budget_space(&wht_req) // return + ls dir // empty (no file, no whiteout) + Problem 2: Power-cut happens before step 3, whiteout inode with 'nlink=1' + is not stored on disk, whiteout dentry(old dentry) is written + on disk, whiteout file is lost on next mount (We get "dead + directory entry" after executing 'ls -l' on whiteout file). + +Now, we use following 3 steps to finish rename whiteout: + 1. create an in-mem inode with 'nlink = 1' as whiteout + 2. ubifs_jnl_rename (Write on disk to finish associating old dentry to + whiteout inode, associating new dentry with old inode) + 3. iput(whiteout) + +Rely writing in-mem inode on disk by ubifs_jnl_rename() to finish rename +whiteout, which avoids middle disk state caused by suddenly power-cut +and error occurring. + +Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 144 +++++++++++++++++++++++++++++++++-------------------- + fs/ubifs/journal.c | 52 ++++++++++++++++--- + 2 files changed, 136 insertions(+), 60 deletions(-) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -349,8 +349,56 @@ out_budg: + return err; + } + +-static int do_tmpfile(struct inode *dir, struct dentry *dentry, +- umode_t mode, struct inode **whiteout) ++static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry) ++{ ++ int err; ++ umode_t mode = S_IFCHR | WHITEOUT_MODE; ++ struct inode *inode; ++ struct ubifs_info *c = dir->i_sb->s_fs_info; ++ struct fscrypt_name nm; ++ ++ /* ++ * Create an inode('nlink = 1') for whiteout without updating journal, ++ * let ubifs_jnl_rename() store it on flash to complete rename whiteout ++ * atomically. ++ */ ++ ++ dbg_gen("dent '%pd', mode %#hx in dir ino %lu", ++ dentry, mode, dir->i_ino); ++ ++ err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm); ++ if (err) ++ return ERR_PTR(err); ++ ++ inode = ubifs_new_inode(c, dir, mode); ++ if (IS_ERR(inode)) { ++ err = PTR_ERR(inode); ++ goto out_free; ++ } ++ ++ init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); ++ ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); ++ ++ err = ubifs_init_security(dir, inode, &dentry->d_name); ++ if (err) ++ goto out_inode; ++ ++ /* The dir size is updated by do_rename. */ ++ insert_inode_hash(inode); ++ ++ return inode; ++ ++out_inode: ++ make_bad_inode(inode); ++ iput(inode); ++out_free: ++ fscrypt_free_filename(&nm); ++ ubifs_err(c, "cannot create whiteout file, error %d", err); ++ return ERR_PTR(err); ++} ++ ++static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, ++ struct dentry *dentry, umode_t mode) + { + struct inode *inode; + struct ubifs_info *c = dir->i_sb->s_fs_info; +@@ -392,25 +440,13 @@ static int do_tmpfile(struct inode *dir, + } + ui = ubifs_inode(inode); + +- if (whiteout) { +- init_special_inode(inode, inode->i_mode, WHITEOUT_DEV); +- ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations); +- } +- + err = ubifs_init_security(dir, inode, &dentry->d_name); + if (err) + goto out_inode; + + mutex_lock(&ui->ui_mutex); + insert_inode_hash(inode); +- +- if (whiteout) { +- mark_inode_dirty(inode); +- drop_nlink(inode); +- *whiteout = inode; +- } else { +- d_tmpfile(dentry, inode); +- } ++ d_tmpfile(dentry, inode); + ubifs_assert(c, ui->dirty); + + instantiated = 1; +@@ -432,8 +468,6 @@ out_inode: + make_bad_inode(inode); + if (!instantiated) + iput(inode); +- else if (whiteout) +- iput(*whiteout); + out_budg: + ubifs_release_budget(c, &req); + if (!instantiated) +@@ -443,12 +477,6 @@ out_budg: + return err; + } + +-static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir, +- struct dentry *dentry, umode_t mode) +-{ +- return do_tmpfile(dir, dentry, mode, NULL); +-} +- + /** + * vfs_dent_type - get VFS directory entry type. + * @type: UBIFS directory entry type +@@ -1266,17 +1294,19 @@ static int do_rename(struct inode *old_d + .dirtied_ino = 3 }; + struct ubifs_budget_req ino_req = { .dirtied_ino = 1, + .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) }; ++ struct ubifs_budget_req wht_req; + struct timespec64 time; + unsigned int saved_nlink; + struct fscrypt_name old_nm, new_nm; + + /* +- * Budget request settings: deletion direntry, new direntry, removing +- * the old inode, and changing old and new parent directory inodes. ++ * Budget request settings: ++ * req: deletion direntry, new direntry, removing the old inode, ++ * and changing old and new parent directory inodes. ++ * ++ * wht_req: new whiteout inode for RENAME_WHITEOUT. + * +- * However, this operation also marks the target inode as dirty and +- * does not write it, so we allocate budget for the target inode +- * separately. ++ * ino_req: marks the target inode as dirty and does not write it. + */ + + dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x", +@@ -1326,7 +1356,6 @@ static int do_rename(struct inode *old_d + + if (flags & RENAME_WHITEOUT) { + union ubifs_dev_desc *dev = NULL; +- struct ubifs_budget_req wht_req; + + dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS); + if (!dev) { +@@ -1334,24 +1363,26 @@ static int do_rename(struct inode *old_d + goto out_release; + } + +- err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout); +- if (err) { ++ /* ++ * The whiteout inode without dentry is pinned in memory, ++ * umount won't happen during rename process because we ++ * got parent dentry. ++ */ ++ whiteout = create_whiteout(old_dir, old_dentry); ++ if (IS_ERR(whiteout)) { ++ err = PTR_ERR(whiteout); + kfree(dev); + goto out_release; + } + +- spin_lock(&whiteout->i_lock); +- whiteout->i_state |= I_LINKABLE; +- spin_unlock(&whiteout->i_lock); +- + whiteout_ui = ubifs_inode(whiteout); + whiteout_ui->data = dev; + whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0)); + ubifs_assert(c, !whiteout_ui->dirty); + + memset(&wht_req, 0, sizeof(struct ubifs_budget_req)); +- wht_req.dirtied_ino = 1; +- wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8); ++ wht_req.new_ino = 1; ++ wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8); + /* + * To avoid deadlock between space budget (holds ui_mutex and + * waits wb work) and writeback work(waits ui_mutex), do space +@@ -1359,6 +1390,11 @@ static int do_rename(struct inode *old_d + */ + err = ubifs_budget_space(c, &wht_req); + if (err) { ++ /* ++ * Whiteout inode can not be written on flash by ++ * ubifs_jnl_write_inode(), because it's neither ++ * dirty nor zero-nlink. ++ */ + iput(whiteout); + goto out_release; + } +@@ -1433,17 +1469,11 @@ static int do_rename(struct inode *old_d + sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir); + if (unlink && IS_SYNC(new_inode)) + sync = 1; +- } +- +- if (whiteout) { +- inc_nlink(whiteout); +- mark_inode_dirty(whiteout); +- +- spin_lock(&whiteout->i_lock); +- whiteout->i_state &= ~I_LINKABLE; +- spin_unlock(&whiteout->i_lock); +- +- iput(whiteout); ++ /* ++ * S_SYNC flag of whiteout inherits from the old_dir, and we ++ * have already checked the old dir inode. So there is no need ++ * to check whiteout. ++ */ + } + + err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir, +@@ -1454,6 +1484,11 @@ static int do_rename(struct inode *old_d + unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + ubifs_release_budget(c, &req); + ++ if (whiteout) { ++ ubifs_release_budget(c, &wht_req); ++ iput(whiteout); ++ } ++ + mutex_lock(&old_inode_ui->ui_mutex); + release = old_inode_ui->dirty; + mark_inode_dirty_sync(old_inode); +@@ -1462,11 +1497,16 @@ static int do_rename(struct inode *old_d + if (release) + ubifs_release_budget(c, &ino_req); + if (IS_SYNC(old_inode)) +- err = old_inode->i_sb->s_op->write_inode(old_inode, NULL); ++ /* ++ * Rename finished here. Although old inode cannot be updated ++ * on flash, old ctime is not a big problem, don't return err ++ * code to userspace. ++ */ ++ old_inode->i_sb->s_op->write_inode(old_inode, NULL); + + fscrypt_free_filename(&old_nm); + fscrypt_free_filename(&new_nm); +- return err; ++ return 0; + + out_cancel: + if (unlink) { +@@ -1487,11 +1527,11 @@ out_cancel: + inc_nlink(old_dir); + } + } ++ unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + if (whiteout) { +- drop_nlink(whiteout); ++ ubifs_release_budget(c, &wht_req); + iput(whiteout); + } +- unlock_4_inodes(old_dir, new_dir, new_inode, whiteout); + out_release: + ubifs_release_budget(c, &ino_req); + ubifs_release_budget(c, &req); +--- a/fs/ubifs/journal.c ++++ b/fs/ubifs/journal.c +@@ -1207,9 +1207,9 @@ out_free: + * @sync: non-zero if the write-buffer has to be synchronized + * + * This function implements the re-name operation which may involve writing up +- * to 4 inodes and 2 directory entries. It marks the written inodes as clean +- * and returns zero on success. In case of failure, a negative error code is +- * returned. ++ * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes) ++ * and 2 directory entries. It marks the written inodes as clean and returns ++ * zero on success. In case of failure, a negative error code is returned. + */ + int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir, + const struct inode *old_inode, +@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info * + void *p; + union ubifs_key key; + struct ubifs_dent_node *dent, *dent2; +- int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0; ++ int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0; + int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ; + int last_reference = !!(new_inode && new_inode->i_nlink == 0); + int move = (old_dir != new_dir); +- struct ubifs_inode *new_ui; ++ struct ubifs_inode *new_ui, *whiteout_ui; + u8 hash_old_dir[UBIFS_HASH_ARR_SZ]; + u8 hash_new_dir[UBIFS_HASH_ARR_SZ]; + u8 hash_new_inode[UBIFS_HASH_ARR_SZ]; ++ u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ]; + u8 hash_dent1[UBIFS_HASH_ARR_SZ]; + u8 hash_dent2[UBIFS_HASH_ARR_SZ]; + +@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info * + } else + ilen = 0; + ++ if (whiteout) { ++ whiteout_ui = ubifs_inode(whiteout); ++ ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex)); ++ ubifs_assert(c, whiteout->i_nlink == 1); ++ ubifs_assert(c, !whiteout_ui->dirty); ++ wlen = UBIFS_INO_NODE_SZ; ++ wlen += whiteout_ui->data_len; ++ } else ++ wlen = 0; ++ + aligned_dlen1 = ALIGN(dlen1, 8); + aligned_dlen2 = ALIGN(dlen2, 8); +- len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8); ++ len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ++ ALIGN(wlen, 8) + ALIGN(plen, 8); + if (move) + len += plen; + +@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info * + p += ALIGN(ilen, 8); + } + ++ if (whiteout) { ++ pack_inode(c, p, whiteout, 0); ++ err = ubifs_node_calc_hash(c, p, hash_whiteout_inode); ++ if (err) ++ goto out_release; ++ ++ p += ALIGN(wlen, 8); ++ } ++ + if (!move) { + pack_inode(c, p, old_dir, 1); + err = ubifs_node_calc_hash(c, p, hash_old_dir); +@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info * + if (new_inode) + ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, + new_inode->i_ino); ++ if (whiteout) ++ ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf, ++ whiteout->i_ino); + } + release_head(c, BASEHD); + +@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info * + err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm); + if (err) + goto out_ro; +- +- ubifs_delete_orphan(c, whiteout->i_ino); + } else { + err = ubifs_add_dirt(c, lnum, dlen2); + if (err) +@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info * + offs += ALIGN(ilen, 8); + } + ++ if (whiteout) { ++ ino_key_init(c, &key, whiteout->i_ino); ++ err = ubifs_tnc_add(c, &key, lnum, offs, wlen, ++ hash_whiteout_inode); ++ if (err) ++ goto out_ro; ++ offs += ALIGN(wlen, 8); ++ } ++ + ino_key_init(c, &key, old_dir->i_ino); + err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir); + if (err) +@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info * + new_ui->synced_i_size = new_ui->ui_size; + spin_unlock(&new_ui->ui_lock); + } ++ /* ++ * No need to mark whiteout inode clean. ++ * Whiteout doesn't have non-zero size, no need to update ++ * synced_i_size for whiteout_ui. ++ */ + mark_inode_clean(c, ubifs_inode(old_dir)); + if (move) + mark_inode_clean(c, ubifs_inode(new_dir)); diff --git a/queue-5.17/ubifs-rename_whiteout-correct-old_dir-size-computing.patch b/queue-5.17/ubifs-rename_whiteout-correct-old_dir-size-computing.patch new file mode 100644 index 00000000000..50012b57c25 --- /dev/null +++ b/queue-5.17/ubifs-rename_whiteout-correct-old_dir-size-computing.patch @@ -0,0 +1,35 @@ +From 705757274599e2e064dd3054aabc74e8af31a095 Mon Sep 17 00:00:00 2001 +From: Baokun Li +Date: Tue, 15 Feb 2022 12:07:36 +0800 +Subject: ubifs: rename_whiteout: correct old_dir size computing + +From: Baokun Li + +commit 705757274599e2e064dd3054aabc74e8af31a095 upstream. + +When renaming the whiteout file, the old whiteout file is not deleted. +Therefore, we add the old dentry size to the old dir like XFS. +Otherwise, an error may be reported due to `fscki->calc_sz != fscki->size` +in check_indes. + +Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") +Reported-by: Zhihao Cheng +Signed-off-by: Baokun Li +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -1402,6 +1402,9 @@ static int do_rename(struct inode *old_d + iput(whiteout); + goto out_release; + } ++ ++ /* Add the old_dentry size to the old_dir size. */ ++ old_sz -= CALC_DENT_SIZE(fname_len(&old_nm)); + } + + lock_4_inodes(old_dir, new_dir, new_inode, whiteout); diff --git a/queue-5.17/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch b/queue-5.17/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch new file mode 100644 index 00000000000..193ac9a430d --- /dev/null +++ b/queue-5.17/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch @@ -0,0 +1,71 @@ +From 40a8f0d5e7b3999f096570edab71c345da812e3e Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:32 +0800 +Subject: ubifs: rename_whiteout: Fix double free for whiteout_ui->data + +From: Zhihao Cheng + +commit 40a8f0d5e7b3999f096570edab71c345da812e3e upstream. + +'whiteout_ui->data' will be freed twice if space budget fail for +rename whiteout operation as following process: + +rename_whiteout + dev = kmalloc + whiteout_ui->data = dev + kfree(whiteout_ui->data) // Free first time + iput(whiteout) + ubifs_free_inode + kfree(ui->data) // Double free! + +KASAN reports: +================================================================== +BUG: KASAN: double-free or invalid-free in ubifs_free_inode+0x4f/0x70 +Call Trace: + kfree+0x117/0x490 + ubifs_free_inode+0x4f/0x70 [ubifs] + i_callback+0x30/0x60 + rcu_do_batch+0x366/0xac0 + __do_softirq+0x133/0x57f + +Allocated by task 1506: + kmem_cache_alloc_trace+0x3c2/0x7a0 + do_rename+0x9b7/0x1150 [ubifs] + ubifs_rename+0x106/0x1f0 [ubifs] + do_syscall_64+0x35/0x80 + +Freed by task 1506: + kfree+0x117/0x490 + do_rename.cold+0x53/0x8a [ubifs] + ubifs_rename+0x106/0x1f0 [ubifs] + do_syscall_64+0x35/0x80 + +The buggy address belongs to the object at ffff88810238bed8 which +belongs to the cache kmalloc-8 of size 8 +================================================================== + +Let ubifs_free_inode() free 'whiteout_ui->data'. BTW, delete unused +assignment 'whiteout_ui->data_len = 0', process 'ubifs_evict_inode() +-> ubifs_jnl_delete_inode() -> ubifs_jnl_write_inode()' doesn't need it +(because 'inc_nlink(whiteout)' won't be excuted by 'goto out_release', + and the nlink of whiteout inode is 0). + +Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/dir.c | 2 -- + 1 file changed, 2 deletions(-) + +--- a/fs/ubifs/dir.c ++++ b/fs/ubifs/dir.c +@@ -1425,8 +1425,6 @@ static int do_rename(struct inode *old_d + + err = ubifs_budget_space(c, &wht_req); + if (err) { +- kfree(whiteout_ui->data); +- whiteout_ui->data_len = 0; + iput(whiteout); + goto out_release; + } diff --git a/queue-5.17/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch b/queue-5.17/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch new file mode 100644 index 00000000000..c173db0f7fb --- /dev/null +++ b/queue-5.17/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch @@ -0,0 +1,38 @@ +From 1b83ec057db16b4d0697dc21ef7a9743b6041f72 Mon Sep 17 00:00:00 2001 +From: Zhihao Cheng +Date: Mon, 27 Dec 2021 11:22:39 +0800 +Subject: ubifs: setflags: Make dirtied_ino_d 8 bytes aligned + +From: Zhihao Cheng + +commit 1b83ec057db16b4d0697dc21ef7a9743b6041f72 upstream. + +Make 'ui->data_len' aligned with 8 bytes before it is assigned to +dirtied_ino_d. Since 8871d84c8f8b0c6b("ubifs: convert to fileattr") +applied, 'setflags()' only affects regular files and directories, only +xattr inode, symlink inode and special inode(pipe/char_dev/block_dev) +have none- zero 'ui->data_len' field, so assertion +'!(req->dirtied_ino_d & 7)' cannot fail in ubifs_budget_space(). +To avoid assertion fails in future evolution(eg. setflags can operate +special inodes), it's better to make dirtied_ino_d 8 bytes aligned, +after all aligned size is still zero for regular files. + +Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system") +Signed-off-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Greg Kroah-Hartman +--- + fs/ubifs/ioctl.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/fs/ubifs/ioctl.c ++++ b/fs/ubifs/ioctl.c +@@ -108,7 +108,7 @@ static int setflags(struct inode *inode, + struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; + struct ubifs_budget_req req = { .dirtied_ino = 1, +- .dirtied_ino_d = ui->data_len }; ++ .dirtied_ino_d = ALIGN(ui->data_len, 8) }; + + err = ubifs_budget_space(c, &req); + if (err)