]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 Apr 2022 11:42:58 +0000 (13:42 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 3 Apr 2022 11:42:58 +0000 (13:42 +0200)
added patches:
nvme-allow-duplicate-nsids-for-private-namespaces.patch
nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch
ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch
ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch
ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch
ubifs-fix-to-add-refcount-once-page-is-set-private.patch
ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch
ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch
ubifs-rename-whiteout-atomically.patch
ubifs-rename_whiteout-correct-old_dir-size-computing.patch
ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch
ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch

13 files changed:
queue-5.15/nvme-allow-duplicate-nsids-for-private-namespaces.patch [new file with mode: 0644]
queue-5.15/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch [new file with mode: 0644]
queue-5.15/series
queue-5.15/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch [new file with mode: 0644]
queue-5.15/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch [new file with mode: 0644]
queue-5.15/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch [new file with mode: 0644]
queue-5.15/ubifs-fix-to-add-refcount-once-page-is-set-private.patch [new file with mode: 0644]
queue-5.15/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch [new file with mode: 0644]
queue-5.15/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch [new file with mode: 0644]
queue-5.15/ubifs-rename-whiteout-atomically.patch [new file with mode: 0644]
queue-5.15/ubifs-rename_whiteout-correct-old_dir-size-computing.patch [new file with mode: 0644]
queue-5.15/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch [new file with mode: 0644]
queue-5.15/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch [new file with mode: 0644]

diff --git a/queue-5.15/nvme-allow-duplicate-nsids-for-private-namespaces.patch b/queue-5.15/nvme-allow-duplicate-nsids-for-private-namespaces.patch
new file mode 100644 (file)
index 0000000..aef94cf
--- /dev/null
@@ -0,0 +1,127 @@
+From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001
+From: Sungup Moon <sungup.moon@samsung.com>
+Date: Mon, 14 Mar 2022 20:05:45 +0900
+Subject: nvme: allow duplicate NSIDs for private namespaces
+
+From: Sungup Moon <sungup.moon@samsung.com>
+
+commit 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e upstream.
+
+A NVMe subsystem with multiple controller can have private namespaces
+that use the same NSID under some conditions:
+
+ "If Namespace Management, ANA Reporting, or NVM Sets are supported, the
+  NSIDs shall be unique within the NVM subsystem. If the Namespace
+  Management, ANA Reporting, and NVM Sets are not supported, then NSIDs:
+   a) for shared namespace shall be unique; and
+   b) for private namespace are not required to be unique."
+
+Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec.
+
+Make sure this specific setup is supported in Linux.
+
+Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head")
+Signed-off-by: Sungup Moon <sungup.moon@samsung.com>
+[hch: refactored and fixed the controller vs subsystem based naming
+      conflict]
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/core.c      |   15 ++++++++++-----
+ drivers/nvme/host/multipath.c |    7 ++++---
+ drivers/nvme/host/nvme.h      |   19 +++++++++++++++++++
+ include/linux/nvme.h          |    1 +
+ 4 files changed, 34 insertions(+), 8 deletions(-)
+
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -3510,15 +3510,20 @@ static const struct attribute_group *nvm
+       NULL,
+ };
+-static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys,
++static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl,
+               unsigned nsid)
+ {
+       struct nvme_ns_head *h;
+-      lockdep_assert_held(&subsys->lock);
++      lockdep_assert_held(&ctrl->subsys->lock);
+-      list_for_each_entry(h, &subsys->nsheads, entry) {
+-              if (h->ns_id != nsid)
++      list_for_each_entry(h, &ctrl->subsys->nsheads, entry) {
++              /*
++               * Private namespaces can share NSIDs under some conditions.
++               * In that case we can't use the same ns_head for namespaces
++               * with the same NSID.
++               */
++              if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h))
+                       continue;
+               if (!list_empty(&h->list) && nvme_tryget_ns_head(h))
+                       return h;
+@@ -3686,7 +3691,7 @@ static int nvme_init_ns_head(struct nvme
+       int ret = 0;
+       mutex_lock(&ctrl->subsys->lock);
+-      head = nvme_find_ns_head(ctrl->subsys, nsid);
++      head = nvme_find_ns_head(ctrl, nsid);
+       if (!head) {
+               head = nvme_alloc_ns_head(ctrl, nsid, ids);
+               if (IS_ERR(head)) {
+--- a/drivers/nvme/host/multipath.c
++++ b/drivers/nvme/host/multipath.c
+@@ -462,10 +462,11 @@ int nvme_mpath_alloc_disk(struct nvme_ct
+       /*
+        * Add a multipath node if the subsystems supports multiple controllers.
+-       * We also do this for private namespaces as the namespace sharing data could
+-       * change after a rescan.
++       * We also do this for private namespaces as the namespace sharing flag
++       * could change after a rescan.
+        */
+-      if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
++      if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
++          !nvme_is_unique_nsid(ctrl, head) || !multipath)
+               return 0;
+       head->disk = blk_alloc_disk(ctrl->numa_node);
+--- a/drivers/nvme/host/nvme.h
++++ b/drivers/nvme/host/nvme.h
+@@ -693,6 +693,25 @@ static inline bool nvme_check_ready(stru
+               return true;
+       return __nvme_check_ready(ctrl, rq, queue_live);
+ }
++
++/*
++ * NSID shall be unique for all shared namespaces, or if at least one of the
++ * following conditions is met:
++ *   1. Namespace Management is supported by the controller
++ *   2. ANA is supported by the controller
++ *   3. NVM Set are supported by the controller
++ *
++ * In other case, private namespace are not required to report a unique NSID.
++ */
++static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl,
++              struct nvme_ns_head *head)
++{
++      return head->shared ||
++              (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) ||
++              (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) ||
++              (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS);
++}
++
+ int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+               void *buf, unsigned bufflen);
+ int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
+--- a/include/linux/nvme.h
++++ b/include/linux/nvme.h
+@@ -322,6 +322,7 @@ enum {
+       NVME_CTRL_ONCS_TIMESTAMP                = 1 << 6,
+       NVME_CTRL_VWC_PRESENT                   = 1 << 0,
+       NVME_CTRL_OACS_SEC_SUPP                 = 1 << 0,
++      NVME_CTRL_OACS_NS_MNGT_SUPP             = 1 << 3,
+       NVME_CTRL_OACS_DIRECTIVES               = 1 << 5,
+       NVME_CTRL_OACS_DBBUF_SUPP               = 1 << 8,
+       NVME_CTRL_LPA_CMD_EFFECTS_LOG           = 1 << 1,
diff --git a/queue-5.15/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch b/queue-5.15/nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch
new file mode 100644 (file)
index 0000000..65b3ecc
--- /dev/null
@@ -0,0 +1,66 @@
+From 726be2c72efc0a64c206e854b8996ad3ab9c7507 Mon Sep 17 00:00:00 2001
+From: Pankaj Raghav <p.raghav@samsung.com>
+Date: Tue, 22 Mar 2022 10:20:48 +0100
+Subject: nvme: fix the read-only state for zoned namespaces with unsupposed features
+
+From: Pankaj Raghav <p.raghav@samsung.com>
+
+commit 726be2c72efc0a64c206e854b8996ad3ab9c7507 upstream.
+
+commit 2f4c9ba23b88 ("nvme: export zoned namespaces without Zone Append
+support read-only") marks zoned namespaces without append support
+read-only.  It does iso by setting NVME_NS_FORCE_RO in ns->flags in
+nvme_update_zone_info and checking for that flag later in
+nvme_update_disk_info to mark the disk as read-only.
+
+But commit 73d90386b559 ("nvme: cleanup zone information initialization")
+rearranged nvme_update_disk_info to be called before
+nvme_update_zone_info and thus not marking the disk as read-only.
+The call order cannot be just reverted because nvme_update_zone_info sets
+certain queue parameters such as zone_write_granularity that depend on the
+prior call to nvme_update_disk_info.
+
+Remove the call to set_disk_ro in nvme_update_disk_info. and call
+set_disk_ro after nvme_update_zone_info and nvme_update_disk_info to set
+the permission for ZNS drives correctly. The same applies to the
+multipath disk path.
+
+Fixes: 73d90386b559 ("nvme: cleanup zone information initialization")
+Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/nvme/host/core.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/nvme/host/core.c
++++ b/drivers/nvme/host/core.c
+@@ -1838,9 +1838,6 @@ static void nvme_update_disk_info(struct
+       nvme_config_discard(disk, ns);
+       blk_queue_max_write_zeroes_sectors(disk->queue,
+                                          ns->ctrl->max_zeroes_sectors);
+-
+-      set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
+-              test_bit(NVME_NS_FORCE_RO, &ns->flags));
+ }
+ static inline bool nvme_first_scan(struct gendisk *disk)
+@@ -1901,6 +1898,8 @@ static int nvme_update_ns_info(struct nv
+                       goto out_unfreeze;
+       }
++      set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) ||
++              test_bit(NVME_NS_FORCE_RO, &ns->flags));
+       set_bit(NVME_NS_READY, &ns->flags);
+       blk_mq_unfreeze_queue(ns->disk->queue);
+@@ -1913,6 +1912,9 @@ static int nvme_update_ns_info(struct nv
+       if (nvme_ns_head_multipath(ns->head)) {
+               blk_mq_freeze_queue(ns->head->disk->queue);
+               nvme_update_disk_info(ns->head->disk, ns, id);
++              set_disk_ro(ns->head->disk,
++                          (id->nsattr & NVME_NS_ATTR_RO) ||
++                                  test_bit(NVME_NS_FORCE_RO, &ns->flags));
+               nvme_mpath_revalidate_paths(ns);
+               blk_stack_limits(&ns->head->disk->queue->limits,
+                                &ns->queue->limits, 0);
index b9ba8b2b02d9e82e421f927e4c10cabb4c6064fe..fe522b5ad38f2af65c50240eabf55facb438d8e1 100644 (file)
@@ -814,3 +814,15 @@ kvm-x86-forbid-vmm-to-set-synic-stimer-msrs-when-synic-wasn-t-activated.patch
 kvm-prevent-module-exit-until-all-vms-are-freed.patch
 kvm-x86-fix-sending-pv-ipi.patch
 kvm-svm-fix-panic-on-out-of-bounds-guest-irq.patch
+ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch
+ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch
+ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch
+ubifs-rename-whiteout-atomically.patch
+ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch
+ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch
+ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch
+ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch
+ubifs-fix-to-add-refcount-once-page-is-set-private.patch
+ubifs-rename_whiteout-correct-old_dir-size-computing.patch
+nvme-allow-duplicate-nsids-for-private-namespaces.patch
+nvme-fix-the-read-only-state-for-zoned-namespaces-with-unsupposed-features.patch
diff --git a/queue-5.15/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch b/queue-5.15/ubifs-add-missing-iput-if-do_tmpfile-failed-in-rename-whiteout.patch
new file mode 100644 (file)
index 0000000..a862dc5
--- /dev/null
@@ -0,0 +1,35 @@
+From 716b4573026bcbfa7b58ed19fe15554bac66b082 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:35 +0800
+Subject: ubifs: Add missing iput if do_tmpfile() failed in rename whiteout
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 716b4573026bcbfa7b58ed19fe15554bac66b082 upstream.
+
+whiteout inode should be put when do_tmpfile() failed if inode has been
+initialized. Otherwise we will get following warning during umount:
+  UBIFS error (ubi0:0 pid 1494): ubifs_assert_failed [ubifs]: UBIFS
+  assert failed: c->bi.dd_growth == 0, in fs/ubifs/super.c:1930
+  VFS: Busy inodes after unmount of ubifs. Self-destruct in 5 seconds.
+
+Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Suggested-by: Sascha Hauer <s.hauer@pengutronix.de>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -432,6 +432,8 @@ out_inode:
+       make_bad_inode(inode);
+       if (!instantiated)
+               iput(inode);
++      else if (whiteout)
++              iput(*whiteout);
+ out_budg:
+       ubifs_release_budget(c, &req);
+       if (!instantiated)
diff --git a/queue-5.15/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch b/queue-5.15/ubifs-fix-deadlock-in-concurrent-rename-whiteout-and-inode-writeback.patch
new file mode 100644 (file)
index 0000000..63599b8
--- /dev/null
@@ -0,0 +1,118 @@
+From afd427048047e8efdedab30e8888044e2be5aa9c Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:33 +0800
+Subject: ubifs: Fix deadlock in concurrent rename whiteout and inode writeback
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit afd427048047e8efdedab30e8888044e2be5aa9c upstream.
+
+Following hung tasks:
+[   77.028764] task:kworker/u8:4    state:D stack:    0 pid:  132
+[   77.028820] Call Trace:
+[   77.029027]  schedule+0x8c/0x1b0
+[   77.029067]  mutex_lock+0x50/0x60
+[   77.029074]  ubifs_write_inode+0x68/0x1f0 [ubifs]
+[   77.029117]  __writeback_single_inode+0x43c/0x570
+[   77.029128]  writeback_sb_inodes+0x259/0x740
+[   77.029148]  wb_writeback+0x107/0x4d0
+[   77.029163]  wb_workfn+0x162/0x7b0
+
+[   92.390442] task:aa              state:D stack:    0 pid: 1506
+[   92.390448] Call Trace:
+[   92.390458]  schedule+0x8c/0x1b0
+[   92.390461]  wb_wait_for_completion+0x82/0xd0
+[   92.390469]  __writeback_inodes_sb_nr+0xb2/0x110
+[   92.390472]  writeback_inodes_sb_nr+0x14/0x20
+[   92.390476]  ubifs_budget_space+0x705/0xdd0 [ubifs]
+[   92.390503]  do_rename.cold+0x7f/0x187 [ubifs]
+[   92.390549]  ubifs_rename+0x8b/0x180 [ubifs]
+[   92.390571]  vfs_rename+0xdb2/0x1170
+[   92.390580]  do_renameat2+0x554/0x770
+
+, are caused by concurrent rename whiteout and inode writeback processes:
+       rename_whiteout(Thread 1)               wb_workfn(Thread2)
+ubifs_rename
+  do_rename
+    lock_4_inodes (Hold ui_mutex)
+    ubifs_budget_space
+      make_free_space
+        shrink_liability
+         __writeback_inodes_sb_nr
+           bdi_split_work_to_wbs (Queue new wb work)
+                                             wb_do_writeback(wb work)
+                                               __writeback_single_inode
+                                                 ubifs_write_inode
+                                                   LOCK(ui_mutex)
+                                                          ↑
+             wb_wait_for_completion (Wait wb work) <-- deadlock!
+
+Reproducer (Detail program in [Link]):
+  1. SYS_renameat2("/mp/dir/file", "/mp/dir/whiteout", RENAME_WHITEOUT)
+  2. Consume out of space before kernel(mdelay) doing budget for whiteout
+
+Fix it by doing whiteout space budget before locking ubifs inodes.
+BTW, it also fixes wrong goto tag 'out_release' in whiteout budget
+error handling path(It should at least recover dir i_size and unlock
+4 ubifs inodes).
+
+Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=214733
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |   25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -1324,6 +1324,7 @@ static int do_rename(struct inode *old_d
+       if (flags & RENAME_WHITEOUT) {
+               union ubifs_dev_desc *dev = NULL;
++              struct ubifs_budget_req wht_req;
+               dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
+               if (!dev) {
+@@ -1345,6 +1346,20 @@ static int do_rename(struct inode *old_d
+               whiteout_ui->data = dev;
+               whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
+               ubifs_assert(c, !whiteout_ui->dirty);
++
++              memset(&wht_req, 0, sizeof(struct ubifs_budget_req));
++              wht_req.dirtied_ino = 1;
++              wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8);
++              /*
++               * To avoid deadlock between space budget (holds ui_mutex and
++               * waits wb work) and writeback work(waits ui_mutex), do space
++               * budget before ubifs inodes locked.
++               */
++              err = ubifs_budget_space(c, &wht_req);
++              if (err) {
++                      iput(whiteout);
++                      goto out_release;
++              }
+       }
+       lock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+@@ -1419,16 +1434,6 @@ static int do_rename(struct inode *old_d
+       }
+       if (whiteout) {
+-              struct ubifs_budget_req wht_req = { .dirtied_ino = 1,
+-                              .dirtied_ino_d = \
+-                              ALIGN(ubifs_inode(whiteout)->data_len, 8) };
+-
+-              err = ubifs_budget_space(c, &wht_req);
+-              if (err) {
+-                      iput(whiteout);
+-                      goto out_release;
+-              }
+-
+               inc_nlink(whiteout);
+               mark_inode_dirty(whiteout);
diff --git a/queue-5.15/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch b/queue-5.15/ubifs-fix-read-out-of-bounds-in-ubifs_wbuf_write_nolock.patch
new file mode 100644 (file)
index 0000000..4970533
--- /dev/null
@@ -0,0 +1,109 @@
+From 4f2262a334641e05f645364d5ade1f565c85f20b Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:40 +0800
+Subject: ubifs: Fix read out-of-bounds in ubifs_wbuf_write_nolock()
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 4f2262a334641e05f645364d5ade1f565c85f20b upstream.
+
+Function ubifs_wbuf_write_nolock() may access buf out of bounds in
+following process:
+
+ubifs_wbuf_write_nolock():
+  aligned_len = ALIGN(len, 8);   // Assume len = 4089, aligned_len = 4096
+  if (aligned_len <= wbuf->avail) ... // Not satisfy
+  if (wbuf->used) {
+    ubifs_leb_write()  // Fill some data in avail wbuf
+    len -= wbuf->avail;   // len is still not 8-bytes aligned
+    aligned_len -= wbuf->avail;
+  }
+  n = aligned_len >> c->max_write_shift;
+  if (n) {
+    n <<= c->max_write_shift;
+    err = ubifs_leb_write(c, wbuf->lnum, buf + written,
+                          wbuf->offs, n);
+    // n > len, read out of bounds less than 8(n-len) bytes
+  }
+
+, which can be catched by KASAN:
+  =========================================================
+  BUG: KASAN: slab-out-of-bounds in ecc_sw_hamming_calculate+0x1dc/0x7d0
+  Read of size 4 at addr ffff888105594ff8 by task kworker/u8:4/128
+  Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+  Call Trace:
+    kasan_report.cold+0x81/0x165
+    nand_write_page_swecc+0xa9/0x160
+    ubifs_leb_write+0xf2/0x1b0 [ubifs]
+    ubifs_wbuf_write_nolock+0x421/0x12c0 [ubifs]
+    write_head+0xdc/0x1c0 [ubifs]
+    ubifs_jnl_write_inode+0x627/0x960 [ubifs]
+    wb_workfn+0x8af/0xb80
+
+Function ubifs_wbuf_write_nolock() accepts that parameter 'len' is not 8
+bytes aligned, the 'len' represents the true length of buf (which is
+allocated in 'ubifs_jnl_xxx', eg. ubifs_jnl_write_inode), so
+ubifs_wbuf_write_nolock() must handle the length read from 'buf' carefully
+to write leb safely.
+
+Fetch a reproducer in [Link].
+
+Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=214785
+Reported-by: Chengsong Ke <kechengsong@huawei.com>
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/io.c |   34 ++++++++++++++++++++++++++++++----
+ 1 file changed, 30 insertions(+), 4 deletions(-)
+
+--- a/fs/ubifs/io.c
++++ b/fs/ubifs/io.c
+@@ -833,16 +833,42 @@ int ubifs_wbuf_write_nolock(struct ubifs
+        */
+       n = aligned_len >> c->max_write_shift;
+       if (n) {
+-              n <<= c->max_write_shift;
++              int m = n - 1;
++
+               dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum,
+                      wbuf->offs);
+-              err = ubifs_leb_write(c, wbuf->lnum, buf + written,
+-                                    wbuf->offs, n);
++
++              if (m) {
++                      /* '(n-1)<<c->max_write_shift < len' is always true. */
++                      m <<= c->max_write_shift;
++                      err = ubifs_leb_write(c, wbuf->lnum, buf + written,
++                                            wbuf->offs, m);
++                      if (err)
++                              goto out;
++                      wbuf->offs += m;
++                      aligned_len -= m;
++                      len -= m;
++                      written += m;
++              }
++
++              /*
++               * The non-written len of buf may be less than 'n' because
++               * parameter 'len' is not 8 bytes aligned, so here we read
++               * min(len, n) bytes from buf.
++               */
++              n = 1 << c->max_write_shift;
++              memcpy(wbuf->buf, buf + written, min(len, n));
++              if (n > len) {
++                      ubifs_assert(c, n - len < 8);
++                      ubifs_pad(c, wbuf->buf + len, n - len);
++              }
++
++              err = ubifs_leb_write(c, wbuf->lnum, wbuf->buf, wbuf->offs, n);
+               if (err)
+                       goto out;
+               wbuf->offs += n;
+               aligned_len -= n;
+-              len -= n;
++              len -= min(len, n);
+               written += n;
+       }
diff --git a/queue-5.15/ubifs-fix-to-add-refcount-once-page-is-set-private.patch b/queue-5.15/ubifs-fix-to-add-refcount-once-page-is-set-private.patch
new file mode 100644 (file)
index 0000000..2b57de6
--- /dev/null
@@ -0,0 +1,184 @@
+From 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:41 +0800
+Subject: ubifs: Fix to add refcount once page is set private
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 3b67db8a6ca83e6ff90b756d3da0c966f61cd37b upstream.
+
+MM defined the rule [1] very clearly that once page was set with PG_private
+flag, we should increment the refcount in that page, also main flows like
+pageout(), migrate_page() will assume there is one additional page
+reference count if page_has_private() returns true. Otherwise, we may
+get a BUG in page migration:
+
+  page:0000000080d05b9d refcount:-1 mapcount:0 mapping:000000005f4d82a8
+  index:0xe2 pfn:0x14c12
+  aops:ubifs_file_address_operations [ubifs] ino:8f1 dentry name:"f30e"
+  flags: 0x1fffff80002405(locked|uptodate|owner_priv_1|private|node=0|
+  zone=1|lastcpupid=0x1fffff)
+  page dumped because: VM_BUG_ON_PAGE(page_count(page) != 0)
+  ------------[ cut here ]------------
+  kernel BUG at include/linux/page_ref.h:184!
+  invalid opcode: 0000 [#1] SMP
+  CPU: 3 PID: 38 Comm: kcompactd0 Not tainted 5.15.0-rc5
+  RIP: 0010:migrate_page_move_mapping+0xac3/0xe70
+  Call Trace:
+    ubifs_migrate_page+0x22/0xc0 [ubifs]
+    move_to_new_page+0xb4/0x600
+    migrate_pages+0x1523/0x1cc0
+    compact_zone+0x8c5/0x14b0
+    kcompactd+0x2bc/0x560
+    kthread+0x18c/0x1e0
+    ret_from_fork+0x1f/0x30
+
+Before the time, we should make clean a concept, what does refcount means
+in page gotten from grab_cache_page_write_begin(). There are 2 situations:
+Situation 1: refcount is 3, page is created by __page_cache_alloc.
+  TYPE_A - the write process is using this page
+  TYPE_B - page is assigned to one certain mapping by calling
+          __add_to_page_cache_locked()
+  TYPE_C - page is added into pagevec list corresponding current cpu by
+          calling lru_cache_add()
+Situation 2: refcount is 2, page is gotten from the mapping's tree
+  TYPE_B - page has been assigned to one certain mapping
+  TYPE_A - the write process is using this page (by calling
+          page_cache_get_speculative())
+Filesystem releases one refcount by calling put_page() in xxx_write_end(),
+the released refcount corresponds to TYPE_A (write task is using it). If
+there are any processes using a page, page migration process will skip the
+page by judging whether expected_page_refs() equals to page refcount.
+
+The BUG is caused by following process:
+    PA(cpu 0)                           kcompactd(cpu 1)
+                               compact_zone
+ubifs_write_begin
+  page_a = grab_cache_page_write_begin
+    add_to_page_cache_lru
+      lru_cache_add
+        pagevec_add // put page into cpu 0's pagevec
+  (refcnf = 3, for page creation process)
+ubifs_write_end
+  SetPagePrivate(page_a) // doesn't increase page count !
+  unlock_page(page_a)
+  put_page(page_a)  // refcnt = 2
+                               [...]
+
+    PB(cpu 0)
+filemap_read
+  filemap_get_pages
+    add_to_page_cache_lru
+      lru_cache_add
+        __pagevec_lru_add // traverse all pages in cpu 0's pagevec
+         __pagevec_lru_add_fn
+           SetPageLRU(page_a)
+                               isolate_migratepages
+                                  isolate_migratepages_block
+                                   get_page_unless_zero(page_a)
+                                   // refcnt = 3
+                                      list_add(page_a, from_list)
+                               migrate_pages(from_list)
+                                 __unmap_and_move
+                                   move_to_new_page
+                                     ubifs_migrate_page(page_a)
+                                       migrate_page_move_mapping
+                                         expected_page_refs get 3
+                                  (migration[1] + mapping[1] + private[1])
+        release_pages
+          put_page_testzero(page_a) // refcnt = 3
+                                          page_ref_freeze  // refcnt = 0
+            page_ref_dec_and_test(0 - 1 = -1)
+                                          page_ref_unfreeze
+                                            VM_BUG_ON_PAGE(-1 != 0, page)
+
+UBIFS doesn't increase the page refcount after setting private flag, which
+leads to page migration task believes the page is not used by any other
+processes, so the page is migrated. This causes concurrent accessing on
+page refcount between put_page() called by other process(eg. read process
+calls lru_cache_add) and page_ref_unfreeze() called by migration task.
+
+Actually zhangjun has tried to fix this problem [2] by recalculating page
+refcnt in ubifs_migrate_page(). It's better to follow MM rules [1], because
+just like Kirill suggested in [2], we need to check all users of
+page_has_private() helper. Like f2fs does in [3], fix it by adding/deleting
+refcount when setting/clearing private for a page. BTW, according to [4],
+we set 'page->private' as 1 because ubifs just simply SetPagePrivate().
+And, [5] provided a common helper to set/clear page private, ubifs can
+use this helper following the example of iomap, afs, btrfs, etc.
+
+Jump [6] to find a reproducer.
+
+[1] https://lore.kernel.org/lkml/2b19b3c4-2bc4-15fa-15cc-27a13e5c7af1@aol.com
+[2] https://www.spinics.net/lists/linux-mtd/msg04018.html
+[3] http://lkml.iu.edu/hypermail/linux/kernel/1903.0/03313.html
+[4] https://lore.kernel.org/linux-f2fs-devel/20210422154705.GO3596236@casper.infradead.org
+[5] https://lore.kernel.org/all/20200517214718.468-1-guoqing.jiang@cloud.ionos.com
+[6] https://bugzilla.kernel.org/show_bug.cgi?id=214961
+
+Fixes: 1e51764a3c2ac0 ("UBIFS: add new flash file system")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/file.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/ubifs/file.c
++++ b/fs/ubifs/file.c
+@@ -570,7 +570,7 @@ static int ubifs_write_end(struct file *
+       }
+       if (!PagePrivate(page)) {
+-              SetPagePrivate(page);
++              attach_page_private(page, (void *)1);
+               atomic_long_inc(&c->dirty_pg_cnt);
+               __set_page_dirty_nobuffers(page);
+       }
+@@ -947,7 +947,7 @@ static int do_writepage(struct page *pag
+               release_existing_page_budget(c);
+       atomic_long_dec(&c->dirty_pg_cnt);
+-      ClearPagePrivate(page);
++      detach_page_private(page);
+       ClearPageChecked(page);
+       kunmap(page);
+@@ -1304,7 +1304,7 @@ static void ubifs_invalidatepage(struct
+               release_existing_page_budget(c);
+       atomic_long_dec(&c->dirty_pg_cnt);
+-      ClearPagePrivate(page);
++      detach_page_private(page);
+       ClearPageChecked(page);
+ }
+@@ -1471,8 +1471,8 @@ static int ubifs_migrate_page(struct add
+               return rc;
+       if (PagePrivate(page)) {
+-              ClearPagePrivate(page);
+-              SetPagePrivate(newpage);
++              detach_page_private(page);
++              attach_page_private(newpage, (void *)1);
+       }
+       if (mode != MIGRATE_SYNC_NO_COPY)
+@@ -1496,7 +1496,7 @@ static int ubifs_releasepage(struct page
+               return 0;
+       ubifs_assert(c, PagePrivate(page));
+       ubifs_assert(c, 0);
+-      ClearPagePrivate(page);
++      detach_page_private(page);
+       ClearPageChecked(page);
+       return 1;
+ }
+@@ -1567,7 +1567,7 @@ static vm_fault_t ubifs_vm_page_mkwrite(
+       else {
+               if (!PageChecked(page))
+                       ubifs_convert_page_budget(c);
+-              SetPagePrivate(page);
++              attach_page_private(page, (void *)1);
+               atomic_long_inc(&c->dirty_pg_cnt);
+               __set_page_dirty_nobuffers(page);
+       }
diff --git a/queue-5.15/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch b/queue-5.15/ubifs-fix-ui-dirty-race-between-do_tmpfile-and-writeback-work.patch
new file mode 100644 (file)
index 0000000..3a916a7
--- /dev/null
@@ -0,0 +1,157 @@
+From 60eb3b9c9f11206996f57cb89521824304b305ad Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:37 +0800
+Subject: ubifs: Fix 'ui->dirty' race between do_tmpfile() and writeback work
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 60eb3b9c9f11206996f57cb89521824304b305ad upstream.
+
+'ui->dirty' is not protected by 'ui_mutex' in function do_tmpfile() which
+may race with ubifs_write_inode[wb_workfn] to access/update 'ui->dirty',
+finally dirty space is released twice.
+
+       open(O_TMPFILE)                wb_workfn
+do_tmpfile
+  ubifs_budget_space(ino_req = { .dirtied_ino = 1})
+  d_tmpfile // mark inode(tmpfile) dirty
+  ubifs_jnl_update // without holding tmpfile's ui_mutex
+    mark_inode_clean(ui)
+      if (ui->dirty)
+        ubifs_release_dirty_inode_budget(ui)  // release first time
+                                   ubifs_write_inode
+                                    mutex_lock(&ui->ui_mutex)
+                                     ubifs_release_dirty_inode_budget(ui)
+                                    // release second time
+                                    mutex_unlock(&ui->ui_mutex)
+      ui->dirty = 0
+
+Run generic/476 can reproduce following message easily
+(See reproducer in [Link]):
+
+  UBIFS error (ubi0:0 pid 2578): ubifs_assert_failed [ubifs]: UBIFS assert
+  failed: c->bi.dd_growth >= 0, in fs/ubifs/budget.c:554
+  UBIFS warning (ubi0:0 pid 2578): ubifs_ro_mode [ubifs]: switched to
+  read-only mode, error -22
+  Workqueue: writeback wb_workfn (flush-ubifs_0_0)
+  Call Trace:
+    ubifs_ro_mode+0x54/0x60 [ubifs]
+    ubifs_assert_failed+0x4b/0x80 [ubifs]
+    ubifs_release_budget+0x468/0x5a0 [ubifs]
+    ubifs_release_dirty_inode_budget+0x53/0x80 [ubifs]
+    ubifs_write_inode+0x121/0x1f0 [ubifs]
+    ...
+    wb_workfn+0x283/0x7b0
+
+Fix it by holding tmpfile ubifs inode lock during ubifs_jnl_update().
+Similar problem exists in whiteout renaming, but previous fix("ubifs:
+Rename whiteout atomically") has solved the problem.
+
+Fixes: 474b93704f32163 ("ubifs: Implement O_TMPFILE")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=214765
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |   60 ++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 30 insertions(+), 30 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -397,6 +397,32 @@ out_free:
+       return ERR_PTR(err);
+ }
++/**
++ * lock_2_inodes - a wrapper for locking two UBIFS inodes.
++ * @inode1: first inode
++ * @inode2: second inode
++ *
++ * We do not implement any tricks to guarantee strict lock ordering, because
++ * VFS has already done it for us on the @i_mutex. So this is just a simple
++ * wrapper function.
++ */
++static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
++{
++      mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
++      mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
++}
++
++/**
++ * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
++ * @inode1: first inode
++ * @inode2: second inode
++ */
++static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
++{
++      mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
++      mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
++}
++
+ static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
+ {
+@@ -404,7 +430,7 @@ static int ubifs_tmpfile(struct user_nam
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1};
+       struct ubifs_budget_req ino_req = { .dirtied_ino = 1 };
+-      struct ubifs_inode *ui, *dir_ui = ubifs_inode(dir);
++      struct ubifs_inode *ui;
+       int err, instantiated = 0;
+       struct fscrypt_name nm;
+@@ -452,18 +478,18 @@ static int ubifs_tmpfile(struct user_nam
+       instantiated = 1;
+       mutex_unlock(&ui->ui_mutex);
+-      mutex_lock(&dir_ui->ui_mutex);
++      lock_2_inodes(dir, inode);
+       err = ubifs_jnl_update(c, dir, &nm, inode, 1, 0);
+       if (err)
+               goto out_cancel;
+-      mutex_unlock(&dir_ui->ui_mutex);
++      unlock_2_inodes(dir, inode);
+       ubifs_release_budget(c, &req);
+       return 0;
+ out_cancel:
+-      mutex_unlock(&dir_ui->ui_mutex);
++      unlock_2_inodes(dir, inode);
+ out_inode:
+       make_bad_inode(inode);
+       if (!instantiated)
+@@ -690,32 +716,6 @@ static int ubifs_dir_release(struct inod
+       return 0;
+ }
+-/**
+- * lock_2_inodes - a wrapper for locking two UBIFS inodes.
+- * @inode1: first inode
+- * @inode2: second inode
+- *
+- * We do not implement any tricks to guarantee strict lock ordering, because
+- * VFS has already done it for us on the @i_mutex. So this is just a simple
+- * wrapper function.
+- */
+-static void lock_2_inodes(struct inode *inode1, struct inode *inode2)
+-{
+-      mutex_lock_nested(&ubifs_inode(inode1)->ui_mutex, WB_MUTEX_1);
+-      mutex_lock_nested(&ubifs_inode(inode2)->ui_mutex, WB_MUTEX_2);
+-}
+-
+-/**
+- * unlock_2_inodes - a wrapper for unlocking two UBIFS inodes.
+- * @inode1: first inode
+- * @inode2: second inode
+- */
+-static void unlock_2_inodes(struct inode *inode1, struct inode *inode2)
+-{
+-      mutex_unlock(&ubifs_inode(inode2)->ui_mutex);
+-      mutex_unlock(&ubifs_inode(inode1)->ui_mutex);
+-}
+-
+ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
+                     struct dentry *dentry)
+ {
diff --git a/queue-5.15/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch b/queue-5.15/ubifs-rectify-space-amount-budget-for-mkdir-tmpfile-operations.patch
new file mode 100644 (file)
index 0000000..beaee3e
--- /dev/null
@@ -0,0 +1,66 @@
+From a6dab6607d4681d227905d5198710b575dbdb519 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:38 +0800
+Subject: ubifs: Rectify space amount budget for mkdir/tmpfile operations
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit a6dab6607d4681d227905d5198710b575dbdb519 upstream.
+
+UBIFS should make sure the flash has enough space to store dirty (Data
+that is newer than disk) data (in memory), space budget is exactly
+designed to do that. If space budget calculates less data than we need,
+'make_reservation()' will do more work(return -ENOSPC if no free space
+lelf, sometimes we can see "cannot reserve xxx bytes in jhead xxx, error
+-28" in ubifs error messages) with ubifs inodes locked, which may effect
+other syscalls.
+
+A simple way to decide how much space do we need when make a budget:
+See how much space is needed by 'make_reservation()' in ubifs_jnl_xxx()
+function according to corresponding operation.
+
+It's better to report ENOSPC in ubifs_budget_space(), as early as we can.
+
+Fixes: 474b93704f32163 ("ubifs: Implement O_TMPFILE")
+Fixes: 1e51764a3c2ac05 ("UBIFS: add new flash file system")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -428,15 +428,18 @@ static int ubifs_tmpfile(struct user_nam
+ {
+       struct inode *inode;
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+-      struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1};
++      struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
++                                      .dirtied_ino = 1};
+       struct ubifs_budget_req ino_req = { .dirtied_ino = 1 };
+       struct ubifs_inode *ui;
+       int err, instantiated = 0;
+       struct fscrypt_name nm;
+       /*
+-       * Budget request settings: new dirty inode, new direntry,
+-       * budget for dirtied inode will be released via writeback.
++       * Budget request settings: new inode, new direntry, changing the
++       * parent directory inode.
++       * Allocate budget separately for new dirtied inode, the budget will
++       * be released via writeback.
+        */
+       dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
+@@ -979,7 +982,8 @@ static int ubifs_mkdir(struct user_names
+       struct ubifs_inode *dir_ui = ubifs_inode(dir);
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+       int err, sz_change;
+-      struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1 };
++      struct ubifs_budget_req req = { .new_ino = 1, .new_dent = 1,
++                                      .dirtied_ino = 1};
+       struct fscrypt_name nm;
+       /*
diff --git a/queue-5.15/ubifs-rename-whiteout-atomically.patch b/queue-5.15/ubifs-rename-whiteout-atomically.patch
new file mode 100644 (file)
index 0000000..0cd0cec
--- /dev/null
@@ -0,0 +1,426 @@
+From 278d9a243635f26c05ad95dcf9c5a593b9e04dc6 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:36 +0800
+Subject: ubifs: Rename whiteout atomically
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 278d9a243635f26c05ad95dcf9c5a593b9e04dc6 upstream.
+
+Currently, rename whiteout has 3 steps:
+  1. create tmpfile(which associates old dentry to tmpfile inode) for
+     whiteout, and store tmpfile to disk
+  2. link whiteout, associate whiteout inode to old dentry agagin and
+     store old dentry, old inode, new dentry on disk
+  3. writeback dirty whiteout inode to disk
+
+Suddenly power-cut or error occurring(eg. ENOSPC returned by budget,
+memory allocation failure) during above steps may cause kinds of problems:
+  Problem 1: ENOSPC returned by whiteout space budget (before step 2),
+            old dentry will disappear after rename syscall, whiteout file
+            cannot be found either.
+
+            ls dir  // we get file, whiteout
+            rename(dir/file, dir/whiteout, REANME_WHITEOUT)
+            ENOSPC = ubifs_budget_space(&wht_req) // return
+            ls dir  // empty (no file, no whiteout)
+  Problem 2: Power-cut happens before step 3, whiteout inode with 'nlink=1'
+            is not stored on disk, whiteout dentry(old dentry) is written
+            on disk, whiteout file is lost on next mount (We get "dead
+            directory entry" after executing 'ls -l' on whiteout file).
+
+Now, we use following 3 steps to finish rename whiteout:
+  1. create an in-mem inode with 'nlink = 1' as whiteout
+  2. ubifs_jnl_rename (Write on disk to finish associating old dentry to
+     whiteout inode, associating new dentry with old inode)
+  3. iput(whiteout)
+
+Rely writing in-mem inode on disk by ubifs_jnl_rename() to finish rename
+whiteout, which avoids middle disk state caused by suddenly power-cut
+and error occurring.
+
+Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c     |  144 +++++++++++++++++++++++++++++++++--------------------
+ fs/ubifs/journal.c |   52 ++++++++++++++++---
+ 2 files changed, 136 insertions(+), 60 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -349,8 +349,56 @@ out_budg:
+       return err;
+ }
+-static int do_tmpfile(struct inode *dir, struct dentry *dentry,
+-                    umode_t mode, struct inode **whiteout)
++static struct inode *create_whiteout(struct inode *dir, struct dentry *dentry)
++{
++      int err;
++      umode_t mode = S_IFCHR | WHITEOUT_MODE;
++      struct inode *inode;
++      struct ubifs_info *c = dir->i_sb->s_fs_info;
++      struct fscrypt_name nm;
++
++      /*
++       * Create an inode('nlink = 1') for whiteout without updating journal,
++       * let ubifs_jnl_rename() store it on flash to complete rename whiteout
++       * atomically.
++       */
++
++      dbg_gen("dent '%pd', mode %#hx in dir ino %lu",
++              dentry, mode, dir->i_ino);
++
++      err = fscrypt_setup_filename(dir, &dentry->d_name, 0, &nm);
++      if (err)
++              return ERR_PTR(err);
++
++      inode = ubifs_new_inode(c, dir, mode);
++      if (IS_ERR(inode)) {
++              err = PTR_ERR(inode);
++              goto out_free;
++      }
++
++      init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
++      ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
++
++      err = ubifs_init_security(dir, inode, &dentry->d_name);
++      if (err)
++              goto out_inode;
++
++      /* The dir size is updated by do_rename. */
++      insert_inode_hash(inode);
++
++      return inode;
++
++out_inode:
++      make_bad_inode(inode);
++      iput(inode);
++out_free:
++      fscrypt_free_filename(&nm);
++      ubifs_err(c, "cannot create whiteout file, error %d", err);
++      return ERR_PTR(err);
++}
++
++static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
++                       struct dentry *dentry, umode_t mode)
+ {
+       struct inode *inode;
+       struct ubifs_info *c = dir->i_sb->s_fs_info;
+@@ -392,25 +440,13 @@ static int do_tmpfile(struct inode *dir,
+       }
+       ui = ubifs_inode(inode);
+-      if (whiteout) {
+-              init_special_inode(inode, inode->i_mode, WHITEOUT_DEV);
+-              ubifs_assert(c, inode->i_op == &ubifs_file_inode_operations);
+-      }
+-
+       err = ubifs_init_security(dir, inode, &dentry->d_name);
+       if (err)
+               goto out_inode;
+       mutex_lock(&ui->ui_mutex);
+       insert_inode_hash(inode);
+-
+-      if (whiteout) {
+-              mark_inode_dirty(inode);
+-              drop_nlink(inode);
+-              *whiteout = inode;
+-      } else {
+-              d_tmpfile(dentry, inode);
+-      }
++      d_tmpfile(dentry, inode);
+       ubifs_assert(c, ui->dirty);
+       instantiated = 1;
+@@ -432,8 +468,6 @@ out_inode:
+       make_bad_inode(inode);
+       if (!instantiated)
+               iput(inode);
+-      else if (whiteout)
+-              iput(*whiteout);
+ out_budg:
+       ubifs_release_budget(c, &req);
+       if (!instantiated)
+@@ -443,12 +477,6 @@ out_budg:
+       return err;
+ }
+-static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+-                       struct dentry *dentry, umode_t mode)
+-{
+-      return do_tmpfile(dir, dentry, mode, NULL);
+-}
+-
+ /**
+  * vfs_dent_type - get VFS directory entry type.
+  * @type: UBIFS directory entry type
+@@ -1266,17 +1294,19 @@ static int do_rename(struct inode *old_d
+                                       .dirtied_ino = 3 };
+       struct ubifs_budget_req ino_req = { .dirtied_ino = 1,
+                       .dirtied_ino_d = ALIGN(old_inode_ui->data_len, 8) };
++      struct ubifs_budget_req wht_req;
+       struct timespec64 time;
+       unsigned int saved_nlink;
+       struct fscrypt_name old_nm, new_nm;
+       /*
+-       * Budget request settings: deletion direntry, new direntry, removing
+-       * the old inode, and changing old and new parent directory inodes.
++       * Budget request settings:
++       *   req: deletion direntry, new direntry, removing the old inode,
++       *   and changing old and new parent directory inodes.
++       *
++       *   wht_req: new whiteout inode for RENAME_WHITEOUT.
+        *
+-       * However, this operation also marks the target inode as dirty and
+-       * does not write it, so we allocate budget for the target inode
+-       * separately.
++       *   ino_req: marks the target inode as dirty and does not write it.
+        */
+       dbg_gen("dent '%pd' ino %lu in dir ino %lu to dent '%pd' in dir ino %lu flags 0x%x",
+@@ -1326,7 +1356,6 @@ static int do_rename(struct inode *old_d
+       if (flags & RENAME_WHITEOUT) {
+               union ubifs_dev_desc *dev = NULL;
+-              struct ubifs_budget_req wht_req;
+               dev = kmalloc(sizeof(union ubifs_dev_desc), GFP_NOFS);
+               if (!dev) {
+@@ -1334,24 +1363,26 @@ static int do_rename(struct inode *old_d
+                       goto out_release;
+               }
+-              err = do_tmpfile(old_dir, old_dentry, S_IFCHR | WHITEOUT_MODE, &whiteout);
+-              if (err) {
++              /*
++               * The whiteout inode without dentry is pinned in memory,
++               * umount won't happen during rename process because we
++               * got parent dentry.
++               */
++              whiteout = create_whiteout(old_dir, old_dentry);
++              if (IS_ERR(whiteout)) {
++                      err = PTR_ERR(whiteout);
+                       kfree(dev);
+                       goto out_release;
+               }
+-              spin_lock(&whiteout->i_lock);
+-              whiteout->i_state |= I_LINKABLE;
+-              spin_unlock(&whiteout->i_lock);
+-
+               whiteout_ui = ubifs_inode(whiteout);
+               whiteout_ui->data = dev;
+               whiteout_ui->data_len = ubifs_encode_dev(dev, MKDEV(0, 0));
+               ubifs_assert(c, !whiteout_ui->dirty);
+               memset(&wht_req, 0, sizeof(struct ubifs_budget_req));
+-              wht_req.dirtied_ino = 1;
+-              wht_req.dirtied_ino_d = ALIGN(whiteout_ui->data_len, 8);
++              wht_req.new_ino = 1;
++              wht_req.new_ino_d = ALIGN(whiteout_ui->data_len, 8);
+               /*
+                * To avoid deadlock between space budget (holds ui_mutex and
+                * waits wb work) and writeback work(waits ui_mutex), do space
+@@ -1359,6 +1390,11 @@ static int do_rename(struct inode *old_d
+                */
+               err = ubifs_budget_space(c, &wht_req);
+               if (err) {
++                      /*
++                       * Whiteout inode can not be written on flash by
++                       * ubifs_jnl_write_inode(), because it's neither
++                       * dirty nor zero-nlink.
++                       */
+                       iput(whiteout);
+                       goto out_release;
+               }
+@@ -1433,17 +1469,11 @@ static int do_rename(struct inode *old_d
+               sync = IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir);
+               if (unlink && IS_SYNC(new_inode))
+                       sync = 1;
+-      }
+-
+-      if (whiteout) {
+-              inc_nlink(whiteout);
+-              mark_inode_dirty(whiteout);
+-
+-              spin_lock(&whiteout->i_lock);
+-              whiteout->i_state &= ~I_LINKABLE;
+-              spin_unlock(&whiteout->i_lock);
+-
+-              iput(whiteout);
++              /*
++               * S_SYNC flag of whiteout inherits from the old_dir, and we
++               * have already checked the old dir inode. So there is no need
++               * to check whiteout.
++               */
+       }
+       err = ubifs_jnl_rename(c, old_dir, old_inode, &old_nm, new_dir,
+@@ -1454,6 +1484,11 @@ static int do_rename(struct inode *old_d
+       unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+       ubifs_release_budget(c, &req);
++      if (whiteout) {
++              ubifs_release_budget(c, &wht_req);
++              iput(whiteout);
++      }
++
+       mutex_lock(&old_inode_ui->ui_mutex);
+       release = old_inode_ui->dirty;
+       mark_inode_dirty_sync(old_inode);
+@@ -1462,11 +1497,16 @@ static int do_rename(struct inode *old_d
+       if (release)
+               ubifs_release_budget(c, &ino_req);
+       if (IS_SYNC(old_inode))
+-              err = old_inode->i_sb->s_op->write_inode(old_inode, NULL);
++              /*
++               * Rename finished here. Although old inode cannot be updated
++               * on flash, old ctime is not a big problem, don't return err
++               * code to userspace.
++               */
++              old_inode->i_sb->s_op->write_inode(old_inode, NULL);
+       fscrypt_free_filename(&old_nm);
+       fscrypt_free_filename(&new_nm);
+-      return err;
++      return 0;
+ out_cancel:
+       if (unlink) {
+@@ -1487,11 +1527,11 @@ out_cancel:
+                               inc_nlink(old_dir);
+               }
+       }
++      unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+       if (whiteout) {
+-              drop_nlink(whiteout);
++              ubifs_release_budget(c, &wht_req);
+               iput(whiteout);
+       }
+-      unlock_4_inodes(old_dir, new_dir, new_inode, whiteout);
+ out_release:
+       ubifs_release_budget(c, &ino_req);
+       ubifs_release_budget(c, &req);
+--- a/fs/ubifs/journal.c
++++ b/fs/ubifs/journal.c
+@@ -1207,9 +1207,9 @@ out_free:
+  * @sync: non-zero if the write-buffer has to be synchronized
+  *
+  * This function implements the re-name operation which may involve writing up
+- * to 4 inodes and 2 directory entries. It marks the written inodes as clean
+- * and returns zero on success. In case of failure, a negative error code is
+- * returned.
++ * to 4 inodes(new inode, whiteout inode, old and new parent directory inodes)
++ * and 2 directory entries. It marks the written inodes as clean and returns
++ * zero on success. In case of failure, a negative error code is returned.
+  */
+ int ubifs_jnl_rename(struct ubifs_info *c, const struct inode *old_dir,
+                    const struct inode *old_inode,
+@@ -1222,14 +1222,15 @@ int ubifs_jnl_rename(struct ubifs_info *
+       void *p;
+       union ubifs_key key;
+       struct ubifs_dent_node *dent, *dent2;
+-      int err, dlen1, dlen2, ilen, lnum, offs, len, orphan_added = 0;
++      int err, dlen1, dlen2, ilen, wlen, lnum, offs, len, orphan_added = 0;
+       int aligned_dlen1, aligned_dlen2, plen = UBIFS_INO_NODE_SZ;
+       int last_reference = !!(new_inode && new_inode->i_nlink == 0);
+       int move = (old_dir != new_dir);
+-      struct ubifs_inode *new_ui;
++      struct ubifs_inode *new_ui, *whiteout_ui;
+       u8 hash_old_dir[UBIFS_HASH_ARR_SZ];
+       u8 hash_new_dir[UBIFS_HASH_ARR_SZ];
+       u8 hash_new_inode[UBIFS_HASH_ARR_SZ];
++      u8 hash_whiteout_inode[UBIFS_HASH_ARR_SZ];
+       u8 hash_dent1[UBIFS_HASH_ARR_SZ];
+       u8 hash_dent2[UBIFS_HASH_ARR_SZ];
+@@ -1249,9 +1250,20 @@ int ubifs_jnl_rename(struct ubifs_info *
+       } else
+               ilen = 0;
++      if (whiteout) {
++              whiteout_ui = ubifs_inode(whiteout);
++              ubifs_assert(c, mutex_is_locked(&whiteout_ui->ui_mutex));
++              ubifs_assert(c, whiteout->i_nlink == 1);
++              ubifs_assert(c, !whiteout_ui->dirty);
++              wlen = UBIFS_INO_NODE_SZ;
++              wlen += whiteout_ui->data_len;
++      } else
++              wlen = 0;
++
+       aligned_dlen1 = ALIGN(dlen1, 8);
+       aligned_dlen2 = ALIGN(dlen2, 8);
+-      len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) + ALIGN(plen, 8);
++      len = aligned_dlen1 + aligned_dlen2 + ALIGN(ilen, 8) +
++            ALIGN(wlen, 8) + ALIGN(plen, 8);
+       if (move)
+               len += plen;
+@@ -1313,6 +1325,15 @@ int ubifs_jnl_rename(struct ubifs_info *
+               p += ALIGN(ilen, 8);
+       }
++      if (whiteout) {
++              pack_inode(c, p, whiteout, 0);
++              err = ubifs_node_calc_hash(c, p, hash_whiteout_inode);
++              if (err)
++                      goto out_release;
++
++              p += ALIGN(wlen, 8);
++      }
++
+       if (!move) {
+               pack_inode(c, p, old_dir, 1);
+               err = ubifs_node_calc_hash(c, p, hash_old_dir);
+@@ -1352,6 +1373,9 @@ int ubifs_jnl_rename(struct ubifs_info *
+               if (new_inode)
+                       ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
+                                                 new_inode->i_ino);
++              if (whiteout)
++                      ubifs_wbuf_add_ino_nolock(&c->jheads[BASEHD].wbuf,
++                                                whiteout->i_ino);
+       }
+       release_head(c, BASEHD);
+@@ -1368,8 +1392,6 @@ int ubifs_jnl_rename(struct ubifs_info *
+               err = ubifs_tnc_add_nm(c, &key, lnum, offs, dlen2, hash_dent2, old_nm);
+               if (err)
+                       goto out_ro;
+-
+-              ubifs_delete_orphan(c, whiteout->i_ino);
+       } else {
+               err = ubifs_add_dirt(c, lnum, dlen2);
+               if (err)
+@@ -1390,6 +1412,15 @@ int ubifs_jnl_rename(struct ubifs_info *
+               offs += ALIGN(ilen, 8);
+       }
++      if (whiteout) {
++              ino_key_init(c, &key, whiteout->i_ino);
++              err = ubifs_tnc_add(c, &key, lnum, offs, wlen,
++                                  hash_whiteout_inode);
++              if (err)
++                      goto out_ro;
++              offs += ALIGN(wlen, 8);
++      }
++
+       ino_key_init(c, &key, old_dir->i_ino);
+       err = ubifs_tnc_add(c, &key, lnum, offs, plen, hash_old_dir);
+       if (err)
+@@ -1410,6 +1441,11 @@ int ubifs_jnl_rename(struct ubifs_info *
+               new_ui->synced_i_size = new_ui->ui_size;
+               spin_unlock(&new_ui->ui_lock);
+       }
++      /*
++       * No need to mark whiteout inode clean.
++       * Whiteout doesn't have non-zero size, no need to update
++       * synced_i_size for whiteout_ui.
++       */
+       mark_inode_clean(c, ubifs_inode(old_dir));
+       if (move)
+               mark_inode_clean(c, ubifs_inode(new_dir));
diff --git a/queue-5.15/ubifs-rename_whiteout-correct-old_dir-size-computing.patch b/queue-5.15/ubifs-rename_whiteout-correct-old_dir-size-computing.patch
new file mode 100644 (file)
index 0000000..50012b5
--- /dev/null
@@ -0,0 +1,35 @@
+From 705757274599e2e064dd3054aabc74e8af31a095 Mon Sep 17 00:00:00 2001
+From: Baokun Li <libaokun1@huawei.com>
+Date: Tue, 15 Feb 2022 12:07:36 +0800
+Subject: ubifs: rename_whiteout: correct old_dir size computing
+
+From: Baokun Li <libaokun1@huawei.com>
+
+commit 705757274599e2e064dd3054aabc74e8af31a095 upstream.
+
+When renaming the whiteout file, the old whiteout file is not deleted.
+Therefore, we add the old dentry size to the old dir like XFS.
+Otherwise, an error may be reported due to `fscki->calc_sz != fscki->size`
+in check_indes.
+
+Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT")
+Reported-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -1402,6 +1402,9 @@ static int do_rename(struct inode *old_d
+                       iput(whiteout);
+                       goto out_release;
+               }
++
++              /* Add the old_dentry size to the old_dir size. */
++              old_sz -= CALC_DENT_SIZE(fname_len(&old_nm));
+       }
+       lock_4_inodes(old_dir, new_dir, new_inode, whiteout);
diff --git a/queue-5.15/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch b/queue-5.15/ubifs-rename_whiteout-fix-double-free-for-whiteout_ui-data.patch
new file mode 100644 (file)
index 0000000..193ac9a
--- /dev/null
@@ -0,0 +1,71 @@
+From 40a8f0d5e7b3999f096570edab71c345da812e3e Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:32 +0800
+Subject: ubifs: rename_whiteout: Fix double free for whiteout_ui->data
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 40a8f0d5e7b3999f096570edab71c345da812e3e upstream.
+
+'whiteout_ui->data' will be freed twice if space budget fail for
+rename whiteout operation as following process:
+
+rename_whiteout
+  dev = kmalloc
+  whiteout_ui->data = dev
+  kfree(whiteout_ui->data)  // Free first time
+  iput(whiteout)
+    ubifs_free_inode
+      kfree(ui->data)      // Double free!
+
+KASAN reports:
+==================================================================
+BUG: KASAN: double-free or invalid-free in ubifs_free_inode+0x4f/0x70
+Call Trace:
+  kfree+0x117/0x490
+  ubifs_free_inode+0x4f/0x70 [ubifs]
+  i_callback+0x30/0x60
+  rcu_do_batch+0x366/0xac0
+  __do_softirq+0x133/0x57f
+
+Allocated by task 1506:
+  kmem_cache_alloc_trace+0x3c2/0x7a0
+  do_rename+0x9b7/0x1150 [ubifs]
+  ubifs_rename+0x106/0x1f0 [ubifs]
+  do_syscall_64+0x35/0x80
+
+Freed by task 1506:
+  kfree+0x117/0x490
+  do_rename.cold+0x53/0x8a [ubifs]
+  ubifs_rename+0x106/0x1f0 [ubifs]
+  do_syscall_64+0x35/0x80
+
+The buggy address belongs to the object at ffff88810238bed8 which
+belongs to the cache kmalloc-8 of size 8
+==================================================================
+
+Let ubifs_free_inode() free 'whiteout_ui->data'. BTW, delete unused
+assignment 'whiteout_ui->data_len = 0', process 'ubifs_evict_inode()
+-> ubifs_jnl_delete_inode() -> ubifs_jnl_write_inode()' doesn't need it
+(because 'inc_nlink(whiteout)' won't be excuted by 'goto out_release',
+ and the nlink of whiteout inode is 0).
+
+Fixes: 9e0a1fff8db56ea ("ubifs: Implement RENAME_WHITEOUT")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/dir.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/fs/ubifs/dir.c
++++ b/fs/ubifs/dir.c
+@@ -1425,8 +1425,6 @@ static int do_rename(struct inode *old_d
+               err = ubifs_budget_space(c, &wht_req);
+               if (err) {
+-                      kfree(whiteout_ui->data);
+-                      whiteout_ui->data_len = 0;
+                       iput(whiteout);
+                       goto out_release;
+               }
diff --git a/queue-5.15/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch b/queue-5.15/ubifs-setflags-make-dirtied_ino_d-8-bytes-aligned.patch
new file mode 100644 (file)
index 0000000..c173db0
--- /dev/null
@@ -0,0 +1,38 @@
+From 1b83ec057db16b4d0697dc21ef7a9743b6041f72 Mon Sep 17 00:00:00 2001
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+Date: Mon, 27 Dec 2021 11:22:39 +0800
+Subject: ubifs: setflags: Make dirtied_ino_d 8 bytes aligned
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+commit 1b83ec057db16b4d0697dc21ef7a9743b6041f72 upstream.
+
+Make 'ui->data_len' aligned with 8 bytes before it is assigned to
+dirtied_ino_d. Since 8871d84c8f8b0c6b("ubifs: convert to fileattr")
+applied, 'setflags()' only affects regular files and directories, only
+xattr inode, symlink inode and special inode(pipe/char_dev/block_dev)
+have none- zero 'ui->data_len' field, so assertion
+'!(req->dirtied_ino_d & 7)' cannot fail in ubifs_budget_space().
+To avoid assertion fails in future evolution(eg. setflags can operate
+special inodes), it's better to make dirtied_ino_d 8 bytes aligned,
+after all aligned size is still zero for regular files.
+
+Fixes: 1e51764a3c2ac05a ("UBIFS: add new flash file system")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ubifs/ioctl.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/ubifs/ioctl.c
++++ b/fs/ubifs/ioctl.c
+@@ -108,7 +108,7 @@ static int setflags(struct inode *inode,
+       struct ubifs_inode *ui = ubifs_inode(inode);
+       struct ubifs_info *c = inode->i_sb->s_fs_info;
+       struct ubifs_budget_req req = { .dirtied_ino = 1,
+-                                      .dirtied_ino_d = ui->data_len };
++                      .dirtied_ino_d = ALIGN(ui->data_len, 8) };
+       err = ubifs_budget_space(c, &req);
+       if (err)