ntfs: skip extent mft records in writeback to prevent deadlock

author Hyunchul Lee <hyc.lee@gmail.com>

Thu, 21 May 2026 05:37:03 +0000 (14:37 +0900)

committer Namjae Jeon <linkinjeon@kernel.org>

Fri, 5 Jun 2026 15:19:47 +0000 (00:19 +0900)
author Hyunchul Lee <hyc.lee@gmail.com>
Thu, 21 May 2026 05:37:03 +0000 (14:37 +0900)
committer Namjae Jeon <linkinjeon@kernel.org>
Fri, 5 Jun 2026 15:19:47 +0000 (00:19 +0900)
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c

index a7d10ee41b34476f569f30b66f6a0d436e410fa9..a5019e80951b8438f796041d46e3775a7e846683 100644 (file)
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -743,23 +743,6 @@ static int ntfs_test_inode_wb(struct inode *vi, u64 ino, void *data)
   *
   * If the mft record is not a FILE record or it is a base mft record, we can
   * safely write it and return 'true'.
- *
- * We now know the mft record is an extent mft record.  We check if the inode
- * corresponding to its base mft record is in icache. If it is not, we cannot
- * safely determine the state of the extent inode, so we return 'false'.
- *
- * We now have the base inode for the extent mft record.  We check if it has an
- * ntfs inode for the extent mft record attached. If not, it is safe to write
- * the extent mft record and we return 'true'.
- *
- * If the extent inode is attached, we check if it is dirty. If so, we return
- * 'false' (letting the standard write_inode path handle it).
- *
- * If it is not dirty, we attempt to lock the extent mft record. If the lock
- * was already taken, it is not safe to write and we return 'false'.
- *
- * If we manage to obtain the lock we have exclusive access to the extent mft
- * record. We set @locked_ni to the now locked ntfs inode and return 'true'.
   */
  static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no,
                 const struct mft_record *m, struct ntfs_inode **locked_ni,
@@ -768,8 +751,7 @@ static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no,
         struct super_block *sb = vol->sb;
         struct inode *mft_vi = vol->mft_ino;
         struct inode *vi;
-       struct ntfs_inode *ni, *eni, **extent_nis;
-       int i;
+       struct ntfs_inode *ni;
         struct ntfs_attr na = {0};
  
         ntfs_debug("Entering for inode 0x%llx.", mft_no);
@@ -849,100 +831,10 @@ static bool ntfs_may_write_mft_record(struct ntfs_volume *vol, const u64 mft_no,
                                 mft_no);
                 return true;
         }
-       /*
-        * This is an extent mft record.  Check if the inode corresponding to
-        * its base mft record is in icache and obtain a reference to it if it
-        * is.
-        */
-       na.mft_no = MREF_LE(m->base_mft_record);
-       na.state = 0;
-       ntfs_debug("Mft record 0x%llx is an extent record.  Looking for base inode 0x%llx in icache.",
-                       mft_no, na.mft_no);
-       if (!na.mft_no) {
-               /* Balance the below iput(). */
-               vi = igrab(mft_vi);
-               WARN_ON(vi != mft_vi);
-       } else {
-               vi = find_inode_nowait(sb, na.mft_no, ntfs_test_inode_wb, &na);
-               if (na.state == NI_BeingDeleted || na.state == NI_BeingCreated)
-                       return false;
-       }
  
-       if (!vi)
-               return false;
-       ntfs_debug("Base inode 0x%llx is in icache.", na.mft_no);
-       /*
-        * The base inode is in icache.  Check if it has the extent inode
-        * corresponding to this extent mft record attached.
-        */
-       ni = NTFS_I(vi);
-       mutex_lock(&ni->extent_lock);
-       if (ni->nr_extents <= 0) {
-               /*
-                * The base inode has no attached extent inodes, write this
-                * extent mft record.
-                */
-               mutex_unlock(&ni->extent_lock);
-               *ref_vi = vi;
-               ntfs_debug("Base inode 0x%llx has no attached extent inodes, write the extent record.",
-                               na.mft_no);
-               return true;
-       }
-       /* Iterate over the attached extent inodes. */
-       extent_nis = ni->ext.extent_ntfs_inos;
-       for (eni = NULL, i = 0; i < ni->nr_extents; ++i) {
-               if (mft_no == extent_nis[i]->mft_no) {
-                       /*
-                        * Found the extent inode corresponding to this extent
-                        * mft record.
-                        */
-                       eni = extent_nis[i];
-                       break;
-               }
-       }
-       /*
-        * If the extent inode was not attached to the base inode, write this
-        * extent mft record.
-        */
-       if (!eni) {
-               mutex_unlock(&ni->extent_lock);
-               *ref_vi = vi;
-               ntfs_debug("Extent inode 0x%llx is not attached to its base inode 0x%llx, write the extent record.",
-                               mft_no, na.mft_no);
-               return true;
-       }
-       ntfs_debug("Extent inode 0x%llx is attached to its base inode 0x%llx.",
-                       mft_no, na.mft_no);
-       /* Take a reference to the extent ntfs inode. */
-       atomic_inc(&eni->count);
-       mutex_unlock(&ni->extent_lock);
-
-       /* if extent inode is dirty, write_inode will write it */
-       if (NInoDirty(eni)) {
-               atomic_dec(&eni->count);
-               *ref_vi = vi;
-               return false;
-       }
-
-       /*
-        * Found the extent inode coresponding to this extent mft record.
-        * Try to take the mft record lock.
-        */
-       if (unlikely(!mutex_trylock(&eni->mrec_lock))) {
-               atomic_dec(&eni->count);
-               *ref_vi = vi;
-               ntfs_debug("Extent mft record 0x%llx is already locked, do not write it.",
-                               mft_no);
-               return false;
-       }
-       ntfs_debug("Managed to lock extent mft record 0x%llx, write it.",
-                       mft_no);
-       /*
-        * The write has to occur while we hold the mft record lock so return
-        * the locked extent ntfs inode.
-        */
-       *locked_ni = eni;
-       return true;
+       ntfs_debug("Mft record 0x%llx is an extent record, skip it.",
+                  mft_no);
+       return false;
  }
  
  static const char *es = "  Leaving inconsistent metadata.  Unmount and run chkdsk.";
@@ -2791,19 +2683,6 @@ static int ntfs_write_mft_block(struct folio *folio, struct writeback_control *w
                         unsigned int mft_record_off = 0;
                         s64 vcn_off = vcn;
  
-                       /*
-                        * Skip $MFT extent mft records and let them being written
-                        * by writeback to avioid deadlocks. the $MFT runlist
-                        * lock must be taken before $MFT extent mrec_lock is taken.
-                        */
-                       if (tni && tni->nr_extents < 0 &&
-                               tni->ext.base_ntfs_ino == NTFS_I(vol->mft_ino)) {
-                               mutex_unlock(&tni->mrec_lock);
-                               atomic_dec(&tni->count);
-                               iput(vol->mft_ino);
-                               continue;
-                       }
-
                         /*
                          * The record should be written.  If a locked ntfs
                          * inode was returned, add it to the array of locked
author	Hyunchul Lee <hyc.lee@gmail.com>
	Thu, 21 May 2026 05:37:03 +0000 (14:37 +0900)
committer	Namjae Jeon <linkinjeon@kernel.org>
	Fri, 5 Jun 2026 15:19:47 +0000 (00:19 +0900)