]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 4.9
authorSasha Levin <sashal@kernel.org>
Thu, 23 Apr 2020 18:44:44 +0000 (14:44 -0400)
committerSasha Levin <sashal@kernel.org>
Thu, 23 Apr 2020 18:44:44 +0000 (14:44 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
queue-4.9/ext4-fix-extent_status-fragmentation-for-plain-files.patch [new file with mode: 0644]
queue-4.9/series

diff --git a/queue-4.9/ext4-fix-extent_status-fragmentation-for-plain-files.patch b/queue-4.9/ext4-fix-extent_status-fragmentation-for-plain-files.patch
new file mode 100644 (file)
index 0000000..7a1f3d6
--- /dev/null
@@ -0,0 +1,122 @@
+From c61885c6d2299b795586963cd782fe461caf8a08 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Nov 2019 12:25:02 +0000
+Subject: ext4: fix extent_status fragmentation for plain files
+
+From: Dmitry Monakhov <dmonakhov@gmail.com>
+
+[ Upstream commit 4068664e3cd2312610ceac05b74c4cf1853b8325 ]
+
+Extents are cached in read_extent_tree_block(); as a result, extents
+are not cached for inodes with depth == 0 when we try to find the
+extent using ext4_find_extent().  The result of the lookup is cached
+in ext4_map_blocks() but is only a subset of the extent on disk.  As a
+result, the contents of extents status cache can get very badly
+fragmented for certain workloads, such as a random 4k read workload.
+
+File size of /mnt/test is 33554432 (8192 blocks of 4096 bytes)
+ ext:     logical_offset:        physical_offset: length:   expected: flags:
+   0:        0..    8191:      40960..     49151:   8192:             last,eof
+
+$ perf record -e 'ext4:ext4_es_*' /root/bin/fio --name=t --direct=0 --rw=randread --bs=4k --filesize=32M --size=32M --filename=/mnt/test
+$ perf script | grep ext4_es_insert_extent | head -n 10
+             fio   131 [000]    13.975421:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [494/1) mapped 41454 status W
+             fio   131 [000]    13.975939:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6064/1) mapped 47024 status W
+             fio   131 [000]    13.976467:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6907/1) mapped 47867 status W
+             fio   131 [000]    13.976937:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3850/1) mapped 44810 status W
+             fio   131 [000]    13.977440:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3292/1) mapped 44252 status W
+             fio   131 [000]    13.977931:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6882/1) mapped 47842 status W
+             fio   131 [000]    13.978376:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3117/1) mapped 44077 status W
+             fio   131 [000]    13.978957:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [2896/1) mapped 43856 status W
+             fio   131 [000]    13.979474:           ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [7479/1) mapped 48439 status W
+
+Fix this by caching the extents for inodes with depth == 0 in
+ext4_find_extent().
+
+[ Renamed ext4_es_cache_extents() to ext4_cache_extents() since this
+  newly added function is not in extents_cache.c, and to avoid
+  potential visual confusion with ext4_es_cache_extent().  -TYT ]
+
+Signed-off-by: Dmitry Monakhov <dmonakhov@gmail.com>
+Link: https://lore.kernel.org/r/20191106122502.19986-1-dmonakhov@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/extents.c | 47 +++++++++++++++++++++++++++--------------------
+ 1 file changed, 27 insertions(+), 20 deletions(-)
+
+diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
+index 999d2a54297dc..5e51bf1193283 100644
+--- a/fs/ext4/extents.c
++++ b/fs/ext4/extents.c
+@@ -510,6 +510,30 @@ int ext4_ext_check_inode(struct inode *inode)
+       return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0);
+ }
++static void ext4_cache_extents(struct inode *inode,
++                             struct ext4_extent_header *eh)
++{
++      struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
++      ext4_lblk_t prev = 0;
++      int i;
++
++      for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
++              unsigned int status = EXTENT_STATUS_WRITTEN;
++              ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
++              int len = ext4_ext_get_actual_len(ex);
++
++              if (prev && (prev != lblk))
++                      ext4_es_cache_extent(inode, prev, lblk - prev, ~0,
++                                           EXTENT_STATUS_HOLE);
++
++              if (ext4_ext_is_unwritten(ex))
++                      status = EXTENT_STATUS_UNWRITTEN;
++              ext4_es_cache_extent(inode, lblk, len,
++                                   ext4_ext_pblock(ex), status);
++              prev = lblk + len;
++      }
++}
++
+ static struct buffer_head *
+ __read_extent_tree_block(const char *function, unsigned int line,
+                        struct inode *inode, ext4_fsblk_t pblk, int depth,
+@@ -540,26 +564,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
+        */
+       if (!(flags & EXT4_EX_NOCACHE) && depth == 0) {
+               struct ext4_extent_header *eh = ext_block_hdr(bh);
+-              struct ext4_extent *ex = EXT_FIRST_EXTENT(eh);
+-              ext4_lblk_t prev = 0;
+-              int i;
+-
+-              for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) {
+-                      unsigned int status = EXTENT_STATUS_WRITTEN;
+-                      ext4_lblk_t lblk = le32_to_cpu(ex->ee_block);
+-                      int len = ext4_ext_get_actual_len(ex);
+-
+-                      if (prev && (prev != lblk))
+-                              ext4_es_cache_extent(inode, prev,
+-                                                   lblk - prev, ~0,
+-                                                   EXTENT_STATUS_HOLE);
+-
+-                      if (ext4_ext_is_unwritten(ex))
+-                              status = EXTENT_STATUS_UNWRITTEN;
+-                      ext4_es_cache_extent(inode, lblk, len,
+-                                           ext4_ext_pblock(ex), status);
+-                      prev = lblk + len;
+-              }
++              ext4_cache_extents(inode, eh);
+       }
+       return bh;
+ errout:
+@@ -907,6 +912,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block,
+       path[0].p_bh = NULL;
+       i = depth;
++      if (!(flags & EXT4_EX_NOCACHE) && depth == 0)
++              ext4_cache_extents(inode, eh);
+       /* walk through the tree */
+       while (i) {
+               ext_debug("depth %d: num %d, max %d\n",
+-- 
+2.20.1
+
index c8430981d4949bdbd945b3c8e836ac95d513f6ce..60e2c69a14082684b8c59ebdb7c7cb69c1341733 100644 (file)
@@ -122,3 +122,4 @@ mtd-phram-fix-a-double-free-issue-in-error-path.patch
 x86-cpu-add-native-cpuid-variants-returning-a-single-datum.patch
 x86-microcode-intel-replace-sync_core-with-native_cpuid_reg-eax.patch
 x86-vdso-fix-lsl-operand-order.patch
+ext4-fix-extent_status-fragmentation-for-plain-files.patch