From ba0e31276b20625f8f50b0b504fe30745c5cf6d0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Thu, 23 Apr 2020 14:44:44 -0400 Subject: [PATCH] Fixes for 4.14 Signed-off-by: Sasha Levin --- ...status-fragmentation-for-plain-files.patch | 122 ++++++++++++++++++ queue-4.14/series | 1 + 2 files changed, 123 insertions(+) create mode 100644 queue-4.14/ext4-fix-extent_status-fragmentation-for-plain-files.patch diff --git a/queue-4.14/ext4-fix-extent_status-fragmentation-for-plain-files.patch b/queue-4.14/ext4-fix-extent_status-fragmentation-for-plain-files.patch new file mode 100644 index 00000000000..40acf77416f --- /dev/null +++ b/queue-4.14/ext4-fix-extent_status-fragmentation-for-plain-files.patch @@ -0,0 +1,122 @@ +From a5cdb45e6d63a0515da470b1c9a42f41866d3dce Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Nov 2019 12:25:02 +0000 +Subject: ext4: fix extent_status fragmentation for plain files + +From: Dmitry Monakhov + +[ Upstream commit 4068664e3cd2312610ceac05b74c4cf1853b8325 ] + +Extents are cached in read_extent_tree_block(); as a result, extents +are not cached for inodes with depth == 0 when we try to find the +extent using ext4_find_extent(). The result of the lookup is cached +in ext4_map_blocks() but is only a subset of the extent on disk. As a +result, the contents of extents status cache can get very badly +fragmented for certain workloads, such as a random 4k read workload. + +File size of /mnt/test is 33554432 (8192 blocks of 4096 bytes) + ext: logical_offset: physical_offset: length: expected: flags: + 0: 0.. 8191: 40960.. 49151: 8192: last,eof + +$ perf record -e 'ext4:ext4_es_*' /root/bin/fio --name=t --direct=0 --rw=randread --bs=4k --filesize=32M --size=32M --filename=/mnt/test +$ perf script | grep ext4_es_insert_extent | head -n 10 + fio 131 [000] 13.975421: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [494/1) mapped 41454 status W + fio 131 [000] 13.975939: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6064/1) mapped 47024 status W + fio 131 [000] 13.976467: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6907/1) mapped 47867 status W + fio 131 [000] 13.976937: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3850/1) mapped 44810 status W + fio 131 [000] 13.977440: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3292/1) mapped 44252 status W + fio 131 [000] 13.977931: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [6882/1) mapped 47842 status W + fio 131 [000] 13.978376: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [3117/1) mapped 44077 status W + fio 131 [000] 13.978957: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [2896/1) mapped 43856 status W + fio 131 [000] 13.979474: ext4:ext4_es_insert_extent: dev 253,0 ino 12 es [7479/1) mapped 48439 status W + +Fix this by caching the extents for inodes with depth == 0 in +ext4_find_extent(). + +[ Renamed ext4_es_cache_extents() to ext4_cache_extents() since this + newly added function is not in extents_cache.c, and to avoid + potential visual confusion with ext4_es_cache_extent(). -TYT ] + +Signed-off-by: Dmitry Monakhov +Link: https://lore.kernel.org/r/20191106122502.19986-1-dmonakhov@gmail.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/extents.c | 47 +++++++++++++++++++++++++++-------------------- + 1 file changed, 27 insertions(+), 20 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index fa6ae9014e8f6..34b79825efdc3 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -510,6 +510,30 @@ int ext4_ext_check_inode(struct inode *inode) + return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode), 0); + } + ++static void ext4_cache_extents(struct inode *inode, ++ struct ext4_extent_header *eh) ++{ ++ struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); ++ ext4_lblk_t prev = 0; ++ int i; ++ ++ for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { ++ unsigned int status = EXTENT_STATUS_WRITTEN; ++ ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); ++ int len = ext4_ext_get_actual_len(ex); ++ ++ if (prev && (prev != lblk)) ++ ext4_es_cache_extent(inode, prev, lblk - prev, ~0, ++ EXTENT_STATUS_HOLE); ++ ++ if (ext4_ext_is_unwritten(ex)) ++ status = EXTENT_STATUS_UNWRITTEN; ++ ext4_es_cache_extent(inode, lblk, len, ++ ext4_ext_pblock(ex), status); ++ prev = lblk + len; ++ } ++} ++ + static struct buffer_head * + __read_extent_tree_block(const char *function, unsigned int line, + struct inode *inode, ext4_fsblk_t pblk, int depth, +@@ -540,26 +564,7 @@ __read_extent_tree_block(const char *function, unsigned int line, + */ + if (!(flags & EXT4_EX_NOCACHE) && depth == 0) { + struct ext4_extent_header *eh = ext_block_hdr(bh); +- struct ext4_extent *ex = EXT_FIRST_EXTENT(eh); +- ext4_lblk_t prev = 0; +- int i; +- +- for (i = le16_to_cpu(eh->eh_entries); i > 0; i--, ex++) { +- unsigned int status = EXTENT_STATUS_WRITTEN; +- ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); +- int len = ext4_ext_get_actual_len(ex); +- +- if (prev && (prev != lblk)) +- ext4_es_cache_extent(inode, prev, +- lblk - prev, ~0, +- EXTENT_STATUS_HOLE); +- +- if (ext4_ext_is_unwritten(ex)) +- status = EXTENT_STATUS_UNWRITTEN; +- ext4_es_cache_extent(inode, lblk, len, +- ext4_ext_pblock(ex), status); +- prev = lblk + len; +- } ++ ext4_cache_extents(inode, eh); + } + return bh; + errout: +@@ -907,6 +912,8 @@ ext4_find_extent(struct inode *inode, ext4_lblk_t block, + path[0].p_bh = NULL; + + i = depth; ++ if (!(flags & EXT4_EX_NOCACHE) && depth == 0) ++ ext4_cache_extents(inode, eh); + /* walk through the tree */ + while (i) { + ext_debug("depth %d: num %d, max %d\n", +-- +2.20.1 + diff --git a/queue-4.14/series b/queue-4.14/series index 41c50bcb7c3..e7f3b6896f3 100644 --- a/queue-4.14/series +++ b/queue-4.14/series @@ -196,3 +196,4 @@ mtd-lpddr-fix-a-double-free-in-probe.patch mtd-phram-fix-a-double-free-issue-in-error-path.patch keys-use-individual-pages-in-big_key-for-crypto-buffers.patch keys-don-t-write-out-to-userspace-while-holding-key-semaphore.patch +ext4-fix-extent_status-fragmentation-for-plain-files.patch -- 2.47.3