]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 24 Sep 2022 10:12:28 +0000 (12:12 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 24 Sep 2022 10:12:28 +0000 (12:12 +0200)
added patches:
interconnect-qcom-icc-rpmh-add-bcms-to-commit-list-in-pre_aggregate.patch
kvm-sev-add-cache-flush-to-solve-sev-cache-incoherency-issues.patch
xfs-fix-up-non-directory-creation-in-sgid-directories.patch
xfs-reorder-iunlink-remove-operation-in-xfs_ifree.patch
xfs-validate-inode-fork-size-against-fork-format.patch

queue-5.10/interconnect-qcom-icc-rpmh-add-bcms-to-commit-list-in-pre_aggregate.patch [new file with mode: 0644]
queue-5.10/kvm-sev-add-cache-flush-to-solve-sev-cache-incoherency-issues.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/xfs-fix-up-non-directory-creation-in-sgid-directories.patch [new file with mode: 0644]
queue-5.10/xfs-reorder-iunlink-remove-operation-in-xfs_ifree.patch [new file with mode: 0644]
queue-5.10/xfs-validate-inode-fork-size-against-fork-format.patch [new file with mode: 0644]

diff --git a/queue-5.10/interconnect-qcom-icc-rpmh-add-bcms-to-commit-list-in-pre_aggregate.patch b/queue-5.10/interconnect-qcom-icc-rpmh-add-bcms-to-commit-list-in-pre_aggregate.patch
new file mode 100644 (file)
index 0000000..5cf2a69
--- /dev/null
@@ -0,0 +1,95 @@
+From b95b668eaaa2574e8ee72f143c52075e9955177e Mon Sep 17 00:00:00 2001
+From: Mike Tipton <mdtipton@codeaurora.org>
+Date: Thu, 25 Nov 2021 19:47:51 +0200
+Subject: interconnect: qcom: icc-rpmh: Add BCMs to commit list in pre_aggregate
+
+From: Mike Tipton <mdtipton@codeaurora.org>
+
+commit b95b668eaaa2574e8ee72f143c52075e9955177e upstream.
+
+We're only adding BCMs to the commit list in aggregate(), but there are
+cases where pre_aggregate() is called without subsequently calling
+aggregate(). In particular, in icc_sync_state() when a node with initial
+BW has zero requests. Since BCMs aren't added to the commit list in
+these cases, we don't actually send the zero BW request to HW. So the
+resources remain on unnecessarily.
+
+Add BCMs to the commit list in pre_aggregate() instead, which is always
+called even when there are no requests.
+
+Signed-off-by: Mike Tipton <mdtipton@codeaurora.org>
+[georgi: remove icc_sync_state for platforms with incomplete support]
+Link: https://lore.kernel.org/r/20211125174751.25317-1-djakov@kernel.org
+Signed-off-by: Georgi Djakov <djakov@kernel.org>
+[dianders: dropped sm8350.c which isn't present in 5.10]
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Acked-by: Alex Elder <elder@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/interconnect/qcom/icc-rpmh.c |   10 +++++-----
+ drivers/interconnect/qcom/sm8150.c   |    1 -
+ drivers/interconnect/qcom/sm8250.c   |    1 -
+ 3 files changed, 5 insertions(+), 7 deletions(-)
+
+--- a/drivers/interconnect/qcom/icc-rpmh.c
++++ b/drivers/interconnect/qcom/icc-rpmh.c
+@@ -20,13 +20,18 @@ void qcom_icc_pre_aggregate(struct icc_n
+ {
+       size_t i;
+       struct qcom_icc_node *qn;
++      struct qcom_icc_provider *qp;
+       qn = node->data;
++      qp = to_qcom_provider(node->provider);
+       for (i = 0; i < QCOM_ICC_NUM_BUCKETS; i++) {
+               qn->sum_avg[i] = 0;
+               qn->max_peak[i] = 0;
+       }
++
++      for (i = 0; i < qn->num_bcms; i++)
++              qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
+ }
+ EXPORT_SYMBOL_GPL(qcom_icc_pre_aggregate);
+@@ -44,10 +49,8 @@ int qcom_icc_aggregate(struct icc_node *
+ {
+       size_t i;
+       struct qcom_icc_node *qn;
+-      struct qcom_icc_provider *qp;
+       qn = node->data;
+-      qp = to_qcom_provider(node->provider);
+       if (!tag)
+               tag = QCOM_ICC_TAG_ALWAYS;
+@@ -67,9 +70,6 @@ int qcom_icc_aggregate(struct icc_node *
+       *agg_avg += avg_bw;
+       *agg_peak = max_t(u32, *agg_peak, peak_bw);
+-      for (i = 0; i < qn->num_bcms; i++)
+-              qcom_icc_bcm_voter_add(qp->voter, qn->bcms[i]);
+-
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(qcom_icc_aggregate);
+--- a/drivers/interconnect/qcom/sm8150.c
++++ b/drivers/interconnect/qcom/sm8150.c
+@@ -627,7 +627,6 @@ static struct platform_driver qnoc_drive
+       .driver = {
+               .name = "qnoc-sm8150",
+               .of_match_table = qnoc_of_match,
+-              .sync_state = icc_sync_state,
+       },
+ };
+ module_platform_driver(qnoc_driver);
+--- a/drivers/interconnect/qcom/sm8250.c
++++ b/drivers/interconnect/qcom/sm8250.c
+@@ -643,7 +643,6 @@ static struct platform_driver qnoc_drive
+       .driver = {
+               .name = "qnoc-sm8250",
+               .of_match_table = qnoc_of_match,
+-              .sync_state = icc_sync_state,
+       },
+ };
+ module_platform_driver(qnoc_driver);
diff --git a/queue-5.10/kvm-sev-add-cache-flush-to-solve-sev-cache-incoherency-issues.patch b/queue-5.10/kvm-sev-add-cache-flush-to-solve-sev-cache-incoherency-issues.patch
new file mode 100644 (file)
index 0000000..56bfe02
--- /dev/null
@@ -0,0 +1,190 @@
+From 683412ccf61294d727ead4a73d97397396e69a6b Mon Sep 17 00:00:00 2001
+From: Mingwei Zhang <mizhang@google.com>
+Date: Thu, 21 Apr 2022 03:14:07 +0000
+Subject: KVM: SEV: add cache flush to solve SEV cache incoherency issues
+
+From: Mingwei Zhang <mizhang@google.com>
+
+commit 683412ccf61294d727ead4a73d97397396e69a6b upstream.
+
+Flush the CPU caches when memory is reclaimed from an SEV guest (where
+reclaim also includes it being unmapped from KVM's memslots).  Due to lack
+of coherency for SEV encrypted memory, failure to flush results in silent
+data corruption if userspace is malicious/broken and doesn't ensure SEV
+guest memory is properly pinned and unpinned.
+
+Cache coherency is not enforced across the VM boundary in SEV (AMD APM
+vol.2 Section 15.34.7). Confidential cachelines, generated by confidential
+VM guests have to be explicitly flushed on the host side. If a memory page
+containing dirty confidential cachelines was released by VM and reallocated
+to another user, the cachelines may corrupt the new user at a later time.
+
+KVM takes a shortcut by assuming all confidential memory remain pinned
+until the end of VM lifetime. Therefore, KVM does not flush cache at
+mmu_notifier invalidation events. Because of this incorrect assumption and
+the lack of cache flushing, malicous userspace can crash the host kernel:
+creating a malicious VM and continuously allocates/releases unpinned
+confidential memory pages when the VM is running.
+
+Add cache flush operations to mmu_notifier operations to ensure that any
+physical memory leaving the guest VM get flushed. In particular, hook
+mmu_notifier_invalidate_range_start and mmu_notifier_release events and
+flush cache accordingly. The hook after releasing the mmu lock to avoid
+contention with other vCPUs.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Sean Christpherson <seanjc@google.com>
+Reported-by: Mingwei Zhang <mizhang@google.com>
+Signed-off-by: Mingwei Zhang <mizhang@google.com>
+Message-Id: <20220421031407.2516575-4-mizhang@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+[OP: applied kvm_arch_guest_memory_reclaimed() calls in kvm_set_memslot() and
+kvm_mmu_notifier_invalidate_range_start();
+OP: adjusted kvm_arch_guest_memory_reclaimed() to not use static_call_cond()]
+Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
+Reviewed-by: Liam Merwick <liam.merwick@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/kvm_host.h |    1 +
+ arch/x86/kvm/svm/sev.c          |    8 ++++++++
+ arch/x86/kvm/svm/svm.c          |    1 +
+ arch/x86/kvm/svm/svm.h          |    2 ++
+ arch/x86/kvm/x86.c              |    6 ++++++
+ include/linux/kvm_host.h        |    2 ++
+ virt/kvm/kvm_main.c             |   16 ++++++++++++++--
+ 7 files changed, 34 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1275,6 +1275,7 @@ struct kvm_x86_ops {
+       int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
+       int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+       int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
++      void (*guest_memory_reclaimed)(struct kvm *kvm);
+       int (*get_msr_feature)(struct kvm_msr_entry *entry);
+--- a/arch/x86/kvm/svm/sev.c
++++ b/arch/x86/kvm/svm/sev.c
+@@ -1177,6 +1177,14 @@ void sev_hardware_teardown(void)
+       sev_flush_asids();
+ }
++void sev_guest_memory_reclaimed(struct kvm *kvm)
++{
++      if (!sev_guest(kvm))
++              return;
++
++      wbinvd_on_all_cpus();
++}
++
+ void pre_sev_run(struct vcpu_svm *svm, int cpu)
+ {
+       struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -4325,6 +4325,7 @@ static struct kvm_x86_ops svm_x86_ops __
+       .mem_enc_op = svm_mem_enc_op,
+       .mem_enc_reg_region = svm_register_enc_region,
+       .mem_enc_unreg_region = svm_unregister_enc_region,
++      .guest_memory_reclaimed = sev_guest_memory_reclaimed,
+       .can_emulate_instruction = svm_can_emulate_instruction,
+--- a/arch/x86/kvm/svm/svm.h
++++ b/arch/x86/kvm/svm/svm.h
+@@ -491,6 +491,8 @@ int svm_register_enc_region(struct kvm *
+                           struct kvm_enc_region *range);
+ int svm_unregister_enc_region(struct kvm *kvm,
+                             struct kvm_enc_region *range);
++void sev_guest_memory_reclaimed(struct kvm *kvm);
++
+ void pre_sev_run(struct vcpu_svm *svm, int cpu);
+ int __init sev_hardware_setup(void);
+ void sev_hardware_teardown(void);
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -8875,6 +8875,12 @@ void kvm_arch_mmu_notifier_invalidate_ra
+               kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD);
+ }
++void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
++{
++      if (kvm_x86_ops.guest_memory_reclaimed)
++              kvm_x86_ops.guest_memory_reclaimed(kvm);
++}
++
+ void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+ {
+       if (!lapic_in_kernel(vcpu))
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -1489,6 +1489,8 @@ static inline long kvm_arch_vcpu_async_i
+ void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm,
+                                           unsigned long start, unsigned long end);
++void kvm_arch_guest_memory_reclaimed(struct kvm *kvm);
++
+ #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE
+ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu);
+ #else
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -159,6 +159,10 @@ __weak void kvm_arch_mmu_notifier_invali
+ {
+ }
++__weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm)
++{
++}
++
+ bool kvm_is_zone_device_pfn(kvm_pfn_t pfn)
+ {
+       /*
+@@ -340,6 +344,12 @@ void kvm_reload_remote_mmus(struct kvm *
+       kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ }
++static void kvm_flush_shadow_all(struct kvm *kvm)
++{
++      kvm_arch_flush_shadow_all(kvm);
++      kvm_arch_guest_memory_reclaimed(kvm);
++}
++
+ #ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
+ static inline void *mmu_memory_cache_alloc_obj(struct kvm_mmu_memory_cache *mc,
+                                              gfp_t gfp_flags)
+@@ -489,6 +499,7 @@ static int kvm_mmu_notifier_invalidate_r
+               kvm_flush_remote_tlbs(kvm);
+       spin_unlock(&kvm->mmu_lock);
++      kvm_arch_guest_memory_reclaimed(kvm);
+       srcu_read_unlock(&kvm->srcu, idx);
+       return 0;
+@@ -592,7 +603,7 @@ static void kvm_mmu_notifier_release(str
+       int idx;
+       idx = srcu_read_lock(&kvm->srcu);
+-      kvm_arch_flush_shadow_all(kvm);
++      kvm_flush_shadow_all(kvm);
+       srcu_read_unlock(&kvm->srcu, idx);
+ }
+@@ -896,7 +907,7 @@ static void kvm_destroy_vm(struct kvm *k
+ #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
+       mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
+ #else
+-      kvm_arch_flush_shadow_all(kvm);
++      kvm_flush_shadow_all(kvm);
+ #endif
+       kvm_arch_destroy_vm(kvm);
+       kvm_destroy_devices(kvm);
+@@ -1238,6 +1249,7 @@ static int kvm_set_memslot(struct kvm *k
+                *      - kvm_is_visible_gfn (mmu_check_root)
+                */
+               kvm_arch_flush_shadow_memslot(kvm, slot);
++              kvm_arch_guest_memory_reclaimed(kvm);
+       }
+       r = kvm_arch_prepare_memory_region(kvm, new, mem, change);
index 88de97392634ec7788990f84d5b12310b3ed2202..787cfb2e11eeccad94ffe0a486b5e50312d441d3 100644 (file)
@@ -59,3 +59,8 @@ gpiolib-cdev-set-lineevent_state-irq-after-irq-register-successfully.patch
 riscv-fix-a-nasty-sigreturn-bug.patch
 can-flexcan-flexcan_mailbox_read-fix-return-value-for-drop-true.patch
 mm-slub-fix-to-return-errno-if-kmalloc-fails.patch
+kvm-sev-add-cache-flush-to-solve-sev-cache-incoherency-issues.patch
+interconnect-qcom-icc-rpmh-add-bcms-to-commit-list-in-pre_aggregate.patch
+xfs-fix-up-non-directory-creation-in-sgid-directories.patch
+xfs-reorder-iunlink-remove-operation-in-xfs_ifree.patch
+xfs-validate-inode-fork-size-against-fork-format.patch
diff --git a/queue-5.10/xfs-fix-up-non-directory-creation-in-sgid-directories.patch b/queue-5.10/xfs-fix-up-non-directory-creation-in-sgid-directories.patch
new file mode 100644 (file)
index 0000000..6db4cd0
--- /dev/null
@@ -0,0 +1,65 @@
+From 01ea173e103edd5ec41acec65b9261b87e123fc2 Mon Sep 17 00:00:00 2001
+From: Christoph Hellwig <hch@lst.de>
+Date: Fri, 22 Jan 2021 16:48:18 -0800
+Subject: xfs: fix up non-directory creation in SGID directories
+
+From: Christoph Hellwig <hch@lst.de>
+
+commit 01ea173e103edd5ec41acec65b9261b87e123fc2 upstream.
+
+XFS always inherits the SGID bit if it is set on the parent inode, while
+the generic inode_init_owner does not do this in a few cases where it can
+create a possible security problem, see commit 0fa3ecd87848
+("Fix up non-directory creation in SGID directories") for details.
+
+Switch XFS to use the generic helper for the normal path to fix this,
+just keeping the simple field inheritance open coded for the case of the
+non-sgid case with the bsdgrpid mount option.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Reported-by: Christian Brauner <christian.brauner@ubuntu.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |   14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -802,6 +802,7 @@ xfs_ialloc(
+       xfs_buf_t       **ialloc_context,
+       xfs_inode_t     **ipp)
+ {
++      struct inode    *dir = pip ? VFS_I(pip) : NULL;
+       struct xfs_mount *mp = tp->t_mountp;
+       xfs_ino_t       ino;
+       xfs_inode_t     *ip;
+@@ -847,18 +848,17 @@ xfs_ialloc(
+               return error;
+       ASSERT(ip != NULL);
+       inode = VFS_I(ip);
+-      inode->i_mode = mode;
+       set_nlink(inode, nlink);
+-      inode->i_uid = current_fsuid();
+       inode->i_rdev = rdev;
+       ip->i_d.di_projid = prid;
+-      if (pip && XFS_INHERIT_GID(pip)) {
+-              inode->i_gid = VFS_I(pip)->i_gid;
+-              if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
+-                      inode->i_mode |= S_ISGID;
++      if (dir && !(dir->i_mode & S_ISGID) &&
++          (mp->m_flags & XFS_MOUNT_GRPID)) {
++              inode->i_uid = current_fsuid();
++              inode->i_gid = dir->i_gid;
++              inode->i_mode = mode;
+       } else {
+-              inode->i_gid = current_fsgid();
++              inode_init_owner(inode, dir, mode);
+       }
+       /*
diff --git a/queue-5.10/xfs-reorder-iunlink-remove-operation-in-xfs_ifree.patch b/queue-5.10/xfs-reorder-iunlink-remove-operation-in-xfs_ifree.patch
new file mode 100644 (file)
index 0000000..6925251
--- /dev/null
@@ -0,0 +1,92 @@
+From foo@baz Sat Sep 24 11:38:56 AM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Thu, 22 Sep 2022 18:47:27 +0300
+Subject: xfs: reorder iunlink remove operation in xfs_ifree
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, "Darrick J . Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Chandan Babu R <chandan.babu@oracle.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>, Frank Hofmann <fhofmann@cloudflare.com>, "Darrick J . Wong" <darrick.wong@oracle.com>, Dave Chinner <david@fromorbit.com>
+Message-ID: <20220922154728.97402-2-amir73il@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 9a5280b312e2e7898b6397b2ca3cfd03f67d7be1 upstream.
+
+[backport for 5.10.y]
+
+The O_TMPFILE creation implementation creates a specific order of
+operations for inode allocation/freeing and unlinked list
+modification. Currently both are serialised by the AGI, so the order
+doesn't strictly matter as long as the are both in the same
+transaction.
+
+However, if we want to move the unlinked list insertions largely out
+from under the AGI lock, then we have to be concerned about the
+order in which we do unlinked list modification operations.
+O_TMPFILE creation tells us this order is inode allocation/free,
+then unlinked list modification.
+
+Change xfs_ifree() to use this same ordering on unlinked list
+removal. This way we always guarantee that when we enter the
+iunlinked list removal code from this path, we already have the AGI
+locked and we don't have to worry about lock nesting AGI reads
+inside unlink list locks because it's already locked and attached to
+the transaction.
+
+We can do this safely as the inode freeing and unlinked list removal
+are done in the same transaction and hence are atomic operations
+with respect to log recovery.
+
+Reported-by: Frank Hofmann <fhofmann@cloudflare.com>
+Fixes: 298f7bec503f ("xfs: pin inode backing buffer to the inode log item")
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/xfs_inode.c |   22 ++++++++++++----------
+ 1 file changed, 12 insertions(+), 10 deletions(-)
+
+--- a/fs/xfs/xfs_inode.c
++++ b/fs/xfs/xfs_inode.c
+@@ -2669,14 +2669,13 @@ xfs_ifree_cluster(
+ }
+ /*
+- * This is called to return an inode to the inode free list.
+- * The inode should already be truncated to 0 length and have
+- * no pages associated with it.  This routine also assumes that
+- * the inode is already a part of the transaction.
++ * This is called to return an inode to the inode free list.  The inode should
++ * already be truncated to 0 length and have no pages associated with it.  This
++ * routine also assumes that the inode is already a part of the transaction.
+  *
+- * The on-disk copy of the inode will have been added to the list
+- * of unlinked inodes in the AGI. We need to remove the inode from
+- * that list atomically with respect to freeing it here.
++ * The on-disk copy of the inode will have been added to the list of unlinked
++ * inodes in the AGI. We need to remove the inode from that list atomically with
++ * respect to freeing it here.
+  */
+ int
+ xfs_ifree(
+@@ -2694,13 +2693,16 @@ xfs_ifree(
+       ASSERT(ip->i_d.di_nblocks == 0);
+       /*
+-       * Pull the on-disk inode from the AGI unlinked list.
++       * Free the inode first so that we guarantee that the AGI lock is going
++       * to be taken before we remove the inode from the unlinked list. This
++       * makes the AGI lock -> unlinked list modification order the same as
++       * used in O_TMPFILE creation.
+        */
+-      error = xfs_iunlink_remove(tp, ip);
++      error = xfs_difree(tp, ip->i_ino, &xic);
+       if (error)
+               return error;
+-      error = xfs_difree(tp, ip->i_ino, &xic);
++      error = xfs_iunlink_remove(tp, ip);
+       if (error)
+               return error;
diff --git a/queue-5.10/xfs-validate-inode-fork-size-against-fork-format.patch b/queue-5.10/xfs-validate-inode-fork-size-against-fork-format.patch
new file mode 100644 (file)
index 0000000..3ab298c
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Sat Sep 24 11:38:56 AM CEST 2022
+From: Amir Goldstein <amir73il@gmail.com>
+Date: Thu, 22 Sep 2022 18:47:28 +0300
+Subject: xfs: validate inode fork size against fork format
+To: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Sasha Levin <sashal@kernel.org>, "Darrick J . Wong" <djwong@kernel.org>, Leah Rumancik <leah.rumancik@gmail.com>, Chandan Babu R <chandan.babu@oracle.com>, linux-xfs@vger.kernel.org, stable@vger.kernel.org, Dave Chinner <dchinner@redhat.com>, Christoph Hellwig <hch@lst.de>, Dave Chinner <david@fromorbit.com>
+Message-ID: <20220922154728.97402-3-amir73il@gmail.com>
+
+From: Dave Chinner <dchinner@redhat.com>
+
+commit 1eb70f54c445fcbb25817841e774adb3d912f3e8 upstream.
+
+[backport for 5.10.y]
+
+xfs_repair catches fork size/format mismatches, but the in-kernel
+verifier doesn't, leading to null pointer failures when attempting
+to perform operations on the fork. This can occur in the
+xfs_dir_is_empty() where the in-memory fork format does not match
+the size and so the fork data pointer is accessed incorrectly.
+
+Note: this causes new failures in xfs/348 which is testing mode vs
+ftype mismatches. We now detect a regular file that has been changed
+to a directory or symlink mode as being corrupt because the data
+fork is for a symlink or directory should be in local form when
+there are only 3 bytes of data in the data fork. Hence the inode
+verify for the regular file now fires w/ -EFSCORRUPTED because
+the inode fork format does not match the format the corrupted mode
+says it should be in.
+
+Signed-off-by: Dave Chinner <dchinner@redhat.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Dave Chinner <david@fromorbit.com>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Acked-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/xfs/libxfs/xfs_inode_buf.c |   35 ++++++++++++++++++++++++++---------
+ 1 file changed, 26 insertions(+), 9 deletions(-)
+
+--- a/fs/xfs/libxfs/xfs_inode_buf.c
++++ b/fs/xfs/libxfs/xfs_inode_buf.c
+@@ -358,19 +358,36 @@ xfs_dinode_verify_fork(
+       int                     whichfork)
+ {
+       uint32_t                di_nextents = XFS_DFORK_NEXTENTS(dip, whichfork);
++      mode_t                  mode = be16_to_cpu(dip->di_mode);
++      uint32_t                fork_size = XFS_DFORK_SIZE(dip, mp, whichfork);
++      uint32_t                fork_format = XFS_DFORK_FORMAT(dip, whichfork);
+-      switch (XFS_DFORK_FORMAT(dip, whichfork)) {
++      /*
++       * For fork types that can contain local data, check that the fork
++       * format matches the size of local data contained within the fork.
++       *
++       * For all types, check that when the size says the should be in extent
++       * or btree format, the inode isn't claiming it is in local format.
++       */
++      if (whichfork == XFS_DATA_FORK) {
++              if (S_ISDIR(mode) || S_ISLNK(mode)) {
++                      if (be64_to_cpu(dip->di_size) <= fork_size &&
++                          fork_format != XFS_DINODE_FMT_LOCAL)
++                              return __this_address;
++              }
++
++              if (be64_to_cpu(dip->di_size) > fork_size &&
++                  fork_format == XFS_DINODE_FMT_LOCAL)
++                      return __this_address;
++      }
++
++      switch (fork_format) {
+       case XFS_DINODE_FMT_LOCAL:
+               /*
+-               * no local regular files yet
++               * No local regular files yet.
+                */
+-              if (whichfork == XFS_DATA_FORK) {
+-                      if (S_ISREG(be16_to_cpu(dip->di_mode)))
+-                              return __this_address;
+-                      if (be64_to_cpu(dip->di_size) >
+-                                      XFS_DFORK_SIZE(dip, mp, whichfork))
+-                              return __this_address;
+-              }
++              if (S_ISREG(mode) && whichfork == XFS_DATA_FORK)
++                      return __this_address;
+               if (di_nextents)
+                       return __this_address;
+               break;