From a847b7650910b931e0e4c8db8af14370470ef385 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sun, 14 Sep 2025 10:02:16 +0200 Subject: [PATCH] 6.6-stable patches added patches: btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch btrfs-use-readahead_expand-on-compressed-extents.patch mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch mm-damon-sysfs-fix-use-after-free-in-state_show.patch mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch --- ...block-size-is-smaller-than-page-size.patch | 250 ++++++++++++++++++ ...adahead_expand-on-compressed-extents.patch | 203 ++++++++++++++ ...o-in-damon_lru_sort_apply_parameters.patch | 55 ++++ ...ro-in-damon_reclaim_apply_parameters.patch | 40 +++ ...sfs-fix-use-after-free-in-state_show.patch | 70 +++++ ...page_collapse_scan_pmd-to-use-folios.patch | 103 ++++++++ ...-passed-to-notifier-on-testing-young.patch | 53 ++++ queue-6.6/series | 7 + 8 files changed, 781 insertions(+) create mode 100644 queue-6.6/btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch create mode 100644 queue-6.6/btrfs-use-readahead_expand-on-compressed-extents.patch create mode 100644 queue-6.6/mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch create mode 100644 queue-6.6/mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch create mode 100644 queue-6.6/mm-damon-sysfs-fix-use-after-free-in-state_show.patch create mode 100644 queue-6.6/mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch create mode 100644 queue-6.6/mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch diff --git a/queue-6.6/btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch b/queue-6.6/btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch new file mode 100644 index 0000000000..cbade1fbaa --- /dev/null +++ b/queue-6.6/btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch @@ -0,0 +1,250 @@ +From stable+bounces-179556-greg=kroah.com@vger.kernel.org Sun Sep 14 06:02:28 2025 +From: Sasha Levin +Date: Sun, 14 Sep 2025 00:01:57 -0400 +Subject: btrfs: fix corruption reading compressed range when block size is smaller than page size +To: stable@vger.kernel.org +Cc: Qu Wenruo , Filipe Manana , David Sterba , Sasha Levin +Message-ID: <20250914040157.1958299-2-sashal@kernel.org> + +From: Qu Wenruo + +[ Upstream commit 9786531399a679fc2f4630d2c0a186205282ab2f ] + +[BUG] +With 64K page size (aarch64 with 64K page size config) and 4K btrfs +block size, the following workload can easily lead to a corrupted read: + + mkfs.btrfs -f -s 4k $dev > /dev/null + mount -o compress $dev $mnt + xfs_io -f -c "pwrite -S 0xff 0 64k" $mnt/base > /dev/null + echo "correct result:" + od -Ad -t x1 $mnt/base + xfs_io -f -c "reflink $mnt/base 32k 0 32k" \ + -c "reflink $mnt/base 0 32k 32k" \ + -c "pwrite -S 0xff 60k 4k" $mnt/new > /dev/null + echo "incorrect result:" + od -Ad -t x1 $mnt/new + umount $mnt + +This shows the following result: + +correct result: +0000000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +* +0065536 +incorrect result: +0000000 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +* +0032768 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +* +0061440 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff +* +0065536 + +Notice the zero in the range [32K, 60K), which is incorrect. + +[CAUSE] +With extra trace printk, it shows the following events during od: +(some unrelated info removed like CPU and context) + + od-3457 btrfs_do_readpage: enter r/i=5/258 folio=0(65536) prev_em_start=0000000000000000 + +The "r/i" is indicating the root and inode number. In our case the file +"new" is using ino 258 from fs tree (root 5). + +Here notice the @prev_em_start pointer is NULL. This means the +btrfs_do_readpage() is called from btrfs_read_folio(), not from +btrfs_readahead(). + + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=0 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=4096 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=8192 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=12288 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=16384 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=20480 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=24576 got em start=0 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=28672 got em start=0 len=32768 + +These above 32K blocks will be read from the first half of the +compressed data extent. + + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=32768 got em start=32768 len=32768 + +Note here there is no btrfs_submit_compressed_read() call. Which is +incorrect now. +Although both extent maps at 0 and 32K are pointing to the same compressed +data, their offsets are different thus can not be merged into the same +read. + +So this means the compressed data read merge check is doing something +wrong. + + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=36864 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=40960 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=45056 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=49152 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=53248 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=57344 got em start=32768 len=32768 + od-3457 btrfs_do_readpage: r/i=5/258 folio=0(65536) cur=61440 skip uptodate + od-3457 btrfs_submit_compressed_read: cb orig_bio: file off=0 len=61440 + +The function btrfs_submit_compressed_read() is only called at the end of +folio read. The compressed bio will only have an extent map of range [0, +32K), but the original bio passed in is for the whole 64K folio. + +This will cause the decompression part to only fill the first 32K, +leaving the rest untouched (aka, filled with zero). + +This incorrect compressed read merge leads to the above data corruption. + +There were similar problems that happened in the past, commit 808f80b46790 +("Btrfs: update fix for read corruption of compressed and shared +extents") is doing pretty much the same fix for readahead. + +But that's back to 2015, where btrfs still only supports bs (block size) +== ps (page size) cases. +This means btrfs_do_readpage() only needs to handle a folio which +contains exactly one block. + +Only btrfs_readahead() can lead to a read covering multiple blocks. +Thus only btrfs_readahead() passes a non-NULL @prev_em_start pointer. + +With v5.15 kernel btrfs introduced bs < ps support. This breaks the above +assumption that a folio can only contain one block. + +Now btrfs_read_folio() can also read multiple blocks in one go. +But btrfs_read_folio() doesn't pass a @prev_em_start pointer, thus the +existing bio force submission check will never be triggered. + +In theory, this can also happen for btrfs with large folios, but since +large folio is still experimental, we don't need to bother it, thus only +bs < ps support is affected for now. + +[FIX] +Instead of passing @prev_em_start to do the proper compressed extent +check, introduce one new member, btrfs_bio_ctrl::last_em_start, so that +the existing bio force submission logic will always be triggered. + +CC: stable@vger.kernel.org # 5.15+ +Reviewed-by: Filipe Manana +Signed-off-by: Qu Wenruo +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 46 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 32 insertions(+), 14 deletions(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -104,6 +104,24 @@ struct btrfs_bio_ctrl { + btrfs_bio_end_io_t end_io_func; + struct writeback_control *wbc; + struct readahead_control *ractl; ++ ++ /* ++ * The start offset of the last used extent map by a read operation. ++ * ++ * This is for proper compressed read merge. ++ * U64_MAX means we are starting the read and have made no progress yet. ++ * ++ * The current btrfs_bio_is_contig() only uses disk_bytenr as ++ * the condition to check if the read can be merged with previous ++ * bio, which is not correct. E.g. two file extents pointing to the ++ * same extent but with different offset. ++ * ++ * So here we need to do extra checks to only merge reads that are ++ * covered by the same extent map. ++ * Just extent_map::start will be enough, as they are unique ++ * inside the same inode. ++ */ ++ u64 last_em_start; + }; + + static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) +@@ -978,7 +996,7 @@ static void btrfs_readahead_expand(struc + * return 0 on success, otherwise return error + */ + static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, +- struct btrfs_bio_ctrl *bio_ctrl, u64 *prev_em_start) ++ struct btrfs_bio_ctrl *bio_ctrl) + { + struct inode *inode = page->mapping->host; + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); +@@ -1095,12 +1113,11 @@ static int btrfs_do_readpage(struct page + * non-optimal behavior (submitting 2 bios for the same extent). + */ + if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) && +- prev_em_start && *prev_em_start != (u64)-1 && +- *prev_em_start != em->start) ++ bio_ctrl->last_em_start != (u64)-1 && ++ bio_ctrl->last_em_start != em->start) + force_bio_submit = true; + +- if (prev_em_start) +- *prev_em_start = em->start; ++ bio_ctrl->last_em_start = em->start; + + free_extent_map(em); + em = NULL; +@@ -1146,12 +1163,15 @@ int btrfs_read_folio(struct file *file, + struct btrfs_inode *inode = BTRFS_I(page->mapping->host); + u64 start = page_offset(page); + u64 end = start + PAGE_SIZE - 1; +- struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ }; ++ struct btrfs_bio_ctrl bio_ctrl = { ++ .opf = REQ_OP_READ, ++ .last_em_start = (u64)-1, ++ }; + int ret; + + btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); + +- ret = btrfs_do_readpage(page, NULL, &bio_ctrl, NULL); ++ ret = btrfs_do_readpage(page, NULL, &bio_ctrl); + /* + * If btrfs_do_readpage() failed we will want to submit the assembled + * bio to do the cleanup. +@@ -1163,8 +1183,7 @@ int btrfs_read_folio(struct file *file, + static inline void contiguous_readpages(struct page *pages[], int nr_pages, + u64 start, u64 end, + struct extent_map **em_cached, +- struct btrfs_bio_ctrl *bio_ctrl, +- u64 *prev_em_start) ++ struct btrfs_bio_ctrl *bio_ctrl) + { + struct btrfs_inode *inode = BTRFS_I(pages[0]->mapping->host); + int index; +@@ -1172,8 +1191,7 @@ static inline void contiguous_readpages( + btrfs_lock_and_flush_ordered_range(inode, start, end, NULL); + + for (index = 0; index < nr_pages; index++) { +- btrfs_do_readpage(pages[index], em_cached, bio_ctrl, +- prev_em_start); ++ btrfs_do_readpage(pages[index], em_cached, bio_ctrl); + put_page(pages[index]); + } + } +@@ -2255,11 +2273,11 @@ void extent_readahead(struct readahead_c + { + struct btrfs_bio_ctrl bio_ctrl = { + .opf = REQ_OP_READ | REQ_RAHEAD, +- .ractl = rac ++ .ractl = rac, ++ .last_em_start = (u64)-1, + }; + struct page *pagepool[16]; + struct extent_map *em_cached = NULL; +- u64 prev_em_start = (u64)-1; + int nr; + + while ((nr = readahead_page_batch(rac, pagepool))) { +@@ -2267,7 +2285,7 @@ void extent_readahead(struct readahead_c + u64 contig_end = contig_start + readahead_batch_length(rac) - 1; + + contiguous_readpages(pagepool, nr, contig_start, contig_end, +- &em_cached, &bio_ctrl, &prev_em_start); ++ &em_cached, &bio_ctrl); + } + + if (em_cached) diff --git a/queue-6.6/btrfs-use-readahead_expand-on-compressed-extents.patch b/queue-6.6/btrfs-use-readahead_expand-on-compressed-extents.patch new file mode 100644 index 0000000000..2dd5270ab4 --- /dev/null +++ b/queue-6.6/btrfs-use-readahead_expand-on-compressed-extents.patch @@ -0,0 +1,203 @@ +From stable+bounces-179555-greg=kroah.com@vger.kernel.org Sun Sep 14 06:02:22 2025 +From: Sasha Levin +Date: Sun, 14 Sep 2025 00:01:56 -0400 +Subject: btrfs: use readahead_expand() on compressed extents +To: stable@vger.kernel.org +Cc: Boris Burkov , Dimitrios Apostolou , Filipe Manana , David Sterba , Sasha Levin +Message-ID: <20250914040157.1958299-1-sashal@kernel.org> + +From: Boris Burkov + +[ Upstream commit 9e9ff875e4174be939371667d2cc81244e31232f ] + +We recently received a report of poor performance doing sequential +buffered reads of a file with compressed extents. With bs=128k, a naive +sequential dd ran as fast on a compressed file as on an uncompressed +(1.2GB/s on my reproducing system) while with bs<32k, this performance +tanked down to ~300MB/s. + +i.e., slow: + + dd if=some-compressed-file of=/dev/null bs=4k count=X + +vs fast: + + dd if=some-compressed-file of=/dev/null bs=128k count=Y + +The cause of this slowness is overhead to do with looking up extent_maps +to enable readahead pre-caching on compressed extents +(add_ra_bio_pages()), as well as some overhead in the generic VFS +readahead code we hit more in the slow case. Notably, the main +difference between the two read sizes is that in the large sized request +case, we call btrfs_readahead() relatively rarely while in the smaller +request we call it for every compressed extent. So the fast case stays +in the btrfs readahead loop: + + while ((folio = readahead_folio(rac)) != NULL) + btrfs_do_readpage(folio, &em_cached, &bio_ctrl, &prev_em_start); + +where the slower one breaks out of that loop every time. This results in +calling add_ra_bio_pages a lot, doing lots of extent_map lookups, +extent_map locking, etc. + +This happens because although add_ra_bio_pages() does add the +appropriate un-compressed file pages to the cache, it does not +communicate back to the ractl in any way. To solve this, we should be +using readahead_expand() to signal to readahead to expand the readahead +window. + +This change passes the readahead_control into the btrfs_bio_ctrl and in +the case of compressed reads sets the expansion to the size of the +extent_map we already looked up anyway. It skips the subpage case as +that one already doesn't do add_ra_bio_pages(). + +With this change, whether we use bs=4k or bs=128k, btrfs expands the +readahead window up to the largest compressed extent we have seen so far +(in the trivial example: 128k) and the call stacks of the two modes look +identical. Notably, we barely call add_ra_bio_pages at all. And the +performance becomes identical as well. So this change certainly "fixes" +this performance problem. + +Of course, it does seem to beg a few questions: + +1. Will this waste too much page cache with a too large ra window? +2. Will this somehow cause bugs prevented by the more thoughtful + checking in add_ra_bio_pages? +3. Should we delete add_ra_bio_pages? + +My stabs at some answers: + +1. Hard to say. See attempts at generic performance testing below. Is + there a "readahead_shrink" we should be using? Should we expand more + slowly, by half the remaining em size each time? +2. I don't think so. Since the new behavior is indistinguishable from + reading the file with a larger read size passed in, I don't see why + one would be safe but not the other. +3. Probably! I tested that and it was fine in fstests, and it seems like + the pages would get re-used just as well in the readahead case. + However, it is possible some reads that use page cache but not + btrfs_readahead() could suffer. I will investigate this further as a + follow up. + +I tested the performance implications of this change in 3 ways (using +compress-force=zstd:3 for compression): + +Directly test the affected workload of small sequential reads on a +compressed file (improved from ~250MB/s to ~1.2GB/s) + +==========for-next========== + dd /mnt/lol/non-cmpr 4k + 1048576+0 records in + 1048576+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 6.02983 s, 712 MB/s + dd /mnt/lol/non-cmpr 128k + 32768+0 records in + 32768+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 5.92403 s, 725 MB/s + dd /mnt/lol/cmpr 4k + 1048576+0 records in + 1048576+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 17.8832 s, 240 MB/s + dd /mnt/lol/cmpr 128k + 32768+0 records in + 32768+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 3.71001 s, 1.2 GB/s + +==========ra-expand========== + dd /mnt/lol/non-cmpr 4k + 1048576+0 records in + 1048576+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 6.09001 s, 705 MB/s + dd /mnt/lol/non-cmpr 128k + 32768+0 records in + 32768+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 6.07664 s, 707 MB/s + dd /mnt/lol/cmpr 4k + 1048576+0 records in + 1048576+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 3.79531 s, 1.1 GB/s + dd /mnt/lol/cmpr 128k + 32768+0 records in + 32768+0 records out + 4294967296 bytes (4.3 GB, 4.0 GiB) copied, 3.69533 s, 1.2 GB/s + +Built the linux kernel from clean (no change) + +Ran fsperf. Mostly neutral results with some improvements and +regressions here and there. + +Reported-by: Dimitrios Apostolou +Link: https://lore.kernel.org/linux-btrfs/34601559-6c16-6ccc-1793-20a97ca0dbba@gmx.net/ +Reviewed-by: Filipe Manana +Signed-off-by: Boris Burkov +Signed-off-by: David Sterba +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + fs/btrfs/extent_io.c | 34 +++++++++++++++++++++++++++++++++- + 1 file changed, 33 insertions(+), 1 deletion(-) + +--- a/fs/btrfs/extent_io.c ++++ b/fs/btrfs/extent_io.c +@@ -103,6 +103,7 @@ struct btrfs_bio_ctrl { + blk_opf_t opf; + btrfs_bio_end_io_t end_io_func; + struct writeback_control *wbc; ++ struct readahead_control *ractl; + }; + + static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) +@@ -952,6 +953,23 @@ __get_extent_map(struct inode *inode, st + } + return em; + } ++ ++static void btrfs_readahead_expand(struct readahead_control *ractl, ++ const struct extent_map *em) ++{ ++ const u64 ra_pos = readahead_pos(ractl); ++ const u64 ra_end = ra_pos + readahead_length(ractl); ++ const u64 em_end = em->start + em->ram_bytes; ++ ++ /* No expansion for holes and inline extents. */ ++ if (em->block_start > EXTENT_MAP_LAST_BYTE) ++ return; ++ ++ ASSERT(em_end >= ra_pos); ++ if (em_end > ra_end) ++ readahead_expand(ractl, ra_pos, em_end - ra_pos); ++} ++ + /* + * basic readpage implementation. Locked extent state structs are inserted + * into the tree that are removed when the IO is done (by the end_io +@@ -1023,6 +1041,17 @@ static int btrfs_do_readpage(struct page + + iosize = min(extent_map_end(em) - cur, end - cur + 1); + iosize = ALIGN(iosize, blocksize); ++ ++ /* ++ * Only expand readahead for extents which are already creating ++ * the pages anyway in add_ra_bio_pages, which is compressed ++ * extents in the non subpage case. ++ */ ++ if (bio_ctrl->ractl && ++ !btrfs_is_subpage(fs_info, page) && ++ compress_type != BTRFS_COMPRESS_NONE) ++ btrfs_readahead_expand(bio_ctrl->ractl, em); ++ + if (compress_type != BTRFS_COMPRESS_NONE) + disk_bytenr = em->block_start; + else +@@ -2224,7 +2253,10 @@ int extent_writepages(struct address_spa + + void extent_readahead(struct readahead_control *rac) + { +- struct btrfs_bio_ctrl bio_ctrl = { .opf = REQ_OP_READ | REQ_RAHEAD }; ++ struct btrfs_bio_ctrl bio_ctrl = { ++ .opf = REQ_OP_READ | REQ_RAHEAD, ++ .ractl = rac ++ }; + struct page *pagepool[16]; + struct extent_map *em_cached = NULL; + u64 prev_em_start = (u64)-1; diff --git a/queue-6.6/mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch b/queue-6.6/mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch new file mode 100644 index 0000000000..9fe526ab51 --- /dev/null +++ b/queue-6.6/mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch @@ -0,0 +1,55 @@ +From 711f19dfd783ffb37ca4324388b9c4cb87e71363 Mon Sep 17 00:00:00 2001 +From: Quanmin Yan +Date: Wed, 27 Aug 2025 19:58:57 +0800 +Subject: mm/damon/lru_sort: avoid divide-by-zero in damon_lru_sort_apply_parameters() + +From: Quanmin Yan + +commit 711f19dfd783ffb37ca4324388b9c4cb87e71363 upstream. + +Patch series "mm/damon: avoid divide-by-zero in DAMON module's parameters +application". + +DAMON's RECLAIM and LRU_SORT modules perform no validation on +user-configured parameters during application, which may lead to +division-by-zero errors. + +Avoid the divide-by-zero by adding validation checks when DAMON modules +attempt to apply the parameters. + + +This patch (of 2): + +During the calculation of 'hot_thres' and 'cold_thres', either +'sample_interval' or 'aggr_interval' is used as the divisor, which may +lead to division-by-zero errors. Fix it by directly returning -EINVAL +when such a case occurs. Additionally, since 'aggr_interval' is already +required to be set no smaller than 'sample_interval' in damon_set_attrs(), +only the case where 'sample_interval' is zero needs to be checked. + +Link: https://lkml.kernel.org/r/20250827115858.1186261-2-yanquanmin1@huawei.com +Fixes: 40e983cca927 ("mm/damon: introduce DAMON-based LRU-lists Sorting") +Signed-off-by: Quanmin Yan +Reviewed-by: SeongJae Park +Cc: Kefeng Wang +Cc: ze zuo +Cc: [6.0+] +Signed-off-by: Andrew Morton +Signed-off-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/lru_sort.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/damon/lru_sort.c ++++ b/mm/damon/lru_sort.c +@@ -203,6 +203,9 @@ static int damon_lru_sort_apply_paramete + unsigned int hot_thres, cold_thres; + int err = 0; + ++ if (!damon_lru_sort_mon_attrs.sample_interval) ++ return -EINVAL; ++ + err = damon_set_attrs(ctx, &damon_lru_sort_mon_attrs); + if (err) + return err; diff --git a/queue-6.6/mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch b/queue-6.6/mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch new file mode 100644 index 0000000000..5d729aea36 --- /dev/null +++ b/queue-6.6/mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch @@ -0,0 +1,40 @@ +From e6b543ca9806d7bced863f43020e016ee996c057 Mon Sep 17 00:00:00 2001 +From: Quanmin Yan +Date: Wed, 27 Aug 2025 19:58:58 +0800 +Subject: mm/damon/reclaim: avoid divide-by-zero in damon_reclaim_apply_parameters() + +From: Quanmin Yan + +commit e6b543ca9806d7bced863f43020e016ee996c057 upstream. + +When creating a new scheme of DAMON_RECLAIM, the calculation of +'min_age_region' uses 'aggr_interval' as the divisor, which may lead to +division-by-zero errors. Fix it by directly returning -EINVAL when such a +case occurs. + +Link: https://lkml.kernel.org/r/20250827115858.1186261-3-yanquanmin1@huawei.com +Fixes: f5a79d7c0c87 ("mm/damon: introduce struct damos_access_pattern") +Signed-off-by: Quanmin Yan +Reviewed-by: SeongJae Park +Cc: Kefeng Wang +Cc: ze zuo +Cc: [6.1+] +Signed-off-by: Andrew Morton +Signed-off-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/reclaim.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/mm/damon/reclaim.c ++++ b/mm/damon/reclaim.c +@@ -167,6 +167,9 @@ static int damon_reclaim_apply_parameter + struct damos_filter *filter; + int err = 0; + ++ if (!damon_reclaim_mon_attrs.aggr_interval) ++ return -EINVAL; ++ + err = damon_set_attrs(ctx, &damon_reclaim_mon_attrs); + if (err) + return err; diff --git a/queue-6.6/mm-damon-sysfs-fix-use-after-free-in-state_show.patch b/queue-6.6/mm-damon-sysfs-fix-use-after-free-in-state_show.patch new file mode 100644 index 0000000000..6ce742012d --- /dev/null +++ b/queue-6.6/mm-damon-sysfs-fix-use-after-free-in-state_show.patch @@ -0,0 +1,70 @@ +From 3260a3f0828e06f5f13fac69fb1999a6d60d9cff Mon Sep 17 00:00:00 2001 +From: Stanislav Fort +Date: Fri, 5 Sep 2025 13:10:46 +0300 +Subject: mm/damon/sysfs: fix use-after-free in state_show() + +From: Stanislav Fort + +commit 3260a3f0828e06f5f13fac69fb1999a6d60d9cff upstream. + +state_show() reads kdamond->damon_ctx without holding damon_sysfs_lock. +This allows a use-after-free race: + +CPU 0 CPU 1 +----- ----- +state_show() damon_sysfs_turn_damon_on() +ctx = kdamond->damon_ctx; mutex_lock(&damon_sysfs_lock); + damon_destroy_ctx(kdamond->damon_ctx); + kdamond->damon_ctx = NULL; + mutex_unlock(&damon_sysfs_lock); +damon_is_running(ctx); /* ctx is freed */ +mutex_lock(&ctx->kdamond_lock); /* UAF */ + +(The race can also occur with damon_sysfs_kdamonds_rm_dirs() and +damon_sysfs_kdamond_release(), which free or replace the context under +damon_sysfs_lock.) + +Fix by taking damon_sysfs_lock before dereferencing the context, mirroring +the locking used in pid_show(). + +The bug has existed since state_show() first accessed kdamond->damon_ctx. + +Link: https://lkml.kernel.org/r/20250905101046.2288-1-disclosure@aisle.com +Fixes: a61ea561c871 ("mm/damon/sysfs: link DAMON for virtual address spaces monitoring") +Signed-off-by: Stanislav Fort +Reported-by: Stanislav Fort +Reviewed-by: SeongJae Park +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: SeongJae Park +Signed-off-by: Greg Kroah-Hartman +--- + mm/damon/sysfs.c | 14 +++++++++----- + 1 file changed, 9 insertions(+), 5 deletions(-) + +--- a/mm/damon/sysfs.c ++++ b/mm/damon/sysfs.c +@@ -1055,14 +1055,18 @@ static ssize_t state_show(struct kobject + { + struct damon_sysfs_kdamond *kdamond = container_of(kobj, + struct damon_sysfs_kdamond, kobj); +- struct damon_ctx *ctx = kdamond->damon_ctx; +- bool running; ++ struct damon_ctx *ctx; ++ bool running = false; + +- if (!ctx) +- running = false; +- else ++ if (!mutex_trylock(&damon_sysfs_lock)) ++ return -EBUSY; ++ ++ ctx = kdamond->damon_ctx; ++ if (ctx) + running = damon_sysfs_ctx_running(ctx); + ++ mutex_unlock(&damon_sysfs_lock); ++ + return sysfs_emit(buf, "%s\n", running ? + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_ON] : + damon_sysfs_cmd_strs[DAMON_SYSFS_CMD_OFF]); diff --git a/queue-6.6/mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch b/queue-6.6/mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch new file mode 100644 index 0000000000..c5598b5dff --- /dev/null +++ b/queue-6.6/mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch @@ -0,0 +1,103 @@ +From stable+bounces-179515-greg=kroah.com@vger.kernel.org Sat Sep 13 20:59:20 2025 +From: Sasha Levin +Date: Sat, 13 Sep 2025 14:59:11 -0400 +Subject: mm/khugepaged: convert hpage_collapse_scan_pmd() to use folios +To: stable@vger.kernel.org +Cc: "Vishal Moola (Oracle)" , Rik van Riel , Yang Shi , Kefeng Wang , "Matthew Wilcox (Oracle)" , Andrew Morton , Sasha Levin +Message-ID: <20250913185912.1514325-1-sashal@kernel.org> + +From: "Vishal Moola (Oracle)" + +[ Upstream commit 5c07ebb372d66423e508ecfb8e00324f8797f072 ] + +Replaces 5 calls to compound_head(), and removes 1385 bytes of kernel +text. + +Link: https://lkml.kernel.org/r/20231020183331.10770-3-vishal.moola@gmail.com +Signed-off-by: Vishal Moola (Oracle) +Reviewed-by: Rik van Riel +Reviewed-by: Yang Shi +Cc: Kefeng Wang +Cc: Matthew Wilcox (Oracle) +Signed-off-by: Andrew Morton +Stable-dep-of: 394bfac1c7f7 ("mm/khugepaged: fix the address passed to notifier on testing young") +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/khugepaged.c | 20 ++++++++++---------- + 1 file changed, 10 insertions(+), 10 deletions(-) + +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1240,6 +1240,7 @@ static int hpage_collapse_scan_pmd(struc + int result = SCAN_FAIL, referenced = 0; + int none_or_zero = 0, shared = 0; + struct page *page = NULL; ++ struct folio *folio = NULL; + unsigned long _address; + spinlock_t *ptl; + int node = NUMA_NO_NODE, unmapped = 0; +@@ -1326,29 +1327,28 @@ static int hpage_collapse_scan_pmd(struc + } + } + +- page = compound_head(page); +- ++ folio = page_folio(page); + /* + * Record which node the original page is from and save this + * information to cc->node_load[]. + * Khugepaged will allocate hugepage from the node has the max + * hit record. + */ +- node = page_to_nid(page); ++ node = folio_nid(folio); + if (hpage_collapse_scan_abort(node, cc)) { + result = SCAN_SCAN_ABORT; + goto out_unmap; + } + cc->node_load[node]++; +- if (!PageLRU(page)) { ++ if (!folio_test_lru(folio)) { + result = SCAN_PAGE_LRU; + goto out_unmap; + } +- if (PageLocked(page)) { ++ if (folio_test_locked(folio)) { + result = SCAN_PAGE_LOCK; + goto out_unmap; + } +- if (!PageAnon(page)) { ++ if (!folio_test_anon(folio)) { + result = SCAN_PAGE_ANON; + goto out_unmap; + } +@@ -1363,7 +1363,7 @@ static int hpage_collapse_scan_pmd(struc + * has excessive GUP pins (i.e. 512). Anyway the same check + * will be done again later the risk seems low. + */ +- if (!is_refcount_suitable(page)) { ++ if (!is_refcount_suitable(&folio->page)) { + result = SCAN_PAGE_COUNT; + goto out_unmap; + } +@@ -1373,8 +1373,8 @@ static int hpage_collapse_scan_pmd(struc + * enough young pte to justify collapsing the page + */ + if (cc->is_khugepaged && +- (pte_young(pteval) || page_is_young(page) || +- PageReferenced(page) || mmu_notifier_test_young(vma->vm_mm, ++ (pte_young(pteval) || folio_test_young(folio) || ++ folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, + address))) + referenced++; + } +@@ -1396,7 +1396,7 @@ out_unmap: + *mmap_locked = false; + } + out: +- trace_mm_khugepaged_scan_pmd(mm, page, writable, referenced, ++ trace_mm_khugepaged_scan_pmd(mm, &folio->page, writable, referenced, + none_or_zero, result, unmapped); + return result; + } diff --git a/queue-6.6/mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch b/queue-6.6/mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch new file mode 100644 index 0000000000..e355c39f63 --- /dev/null +++ b/queue-6.6/mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch @@ -0,0 +1,53 @@ +From stable+bounces-179516-greg=kroah.com@vger.kernel.org Sat Sep 13 20:59:22 2025 +From: Sasha Levin +Date: Sat, 13 Sep 2025 14:59:12 -0400 +Subject: mm/khugepaged: fix the address passed to notifier on testing young +To: stable@vger.kernel.org +Cc: Wei Yang , Dev Jain , Zi Yan , David Hildenbrand , Lorenzo Stoakes , Baolin Wang , "Liam R. Howlett" , Nico Pache , Ryan Roberts , Barry Song , Andrew Morton , Sasha Levin +Message-ID: <20250913185912.1514325-2-sashal@kernel.org> + +From: Wei Yang + +[ Upstream commit 394bfac1c7f7b701c2c93834c5761b9c9ceeebcf ] + +Commit 8ee53820edfd ("thp: mmu_notifier_test_young") introduced +mmu_notifier_test_young(), but we are passing the wrong address. +In xxx_scan_pmd(), the actual iteration address is "_address" not +"address". We seem to misuse the variable on the very beginning. + +Change it to the right one. + +[akpm@linux-foundation.org fix whitespace, per everyone] +Link: https://lkml.kernel.org/r/20250822063318.11644-1-richard.weiyang@gmail.com +Fixes: 8ee53820edfd ("thp: mmu_notifier_test_young") +Signed-off-by: Wei Yang +Reviewed-by: Dev Jain +Reviewed-by: Zi Yan +Acked-by: David Hildenbrand +Reviewed-by: Lorenzo Stoakes +Cc: Baolin Wang +Cc: Liam R. Howlett +Cc: Nico Pache +Cc: Ryan Roberts +Cc: Barry Song +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +Signed-off-by: Greg Kroah-Hartman +--- + mm/khugepaged.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/mm/khugepaged.c ++++ b/mm/khugepaged.c +@@ -1374,8 +1374,8 @@ static int hpage_collapse_scan_pmd(struc + */ + if (cc->is_khugepaged && + (pte_young(pteval) || folio_test_young(folio) || +- folio_test_referenced(folio) || mmu_notifier_test_young(vma->vm_mm, +- address))) ++ folio_test_referenced(folio) || ++ mmu_notifier_test_young(vma->vm_mm, _address))) + referenced++; + } + if (!writable) { diff --git a/queue-6.6/series b/queue-6.6/series index 8f52b75764..0e86e8679e 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -47,3 +47,10 @@ drm-amdgpu-vcn4-fix-ib-parsing-with-multiple-engine-info-packages.patch mtd-nand-raw-atmel-fix-comment-in-timings-preparation.patch mtd-nand-raw-atmel-respect-tar-tclr-in-read-setup-timing.patch libceph-fix-invalid-accesses-to-ceph_connection_v1_info.patch +mm-damon-sysfs-fix-use-after-free-in-state_show.patch +mm-damon-reclaim-avoid-divide-by-zero-in-damon_reclaim_apply_parameters.patch +mm-damon-lru_sort-avoid-divide-by-zero-in-damon_lru_sort_apply_parameters.patch +btrfs-use-readahead_expand-on-compressed-extents.patch +btrfs-fix-corruption-reading-compressed-range-when-block-size-is-smaller-than-page-size.patch +mm-khugepaged-convert-hpage_collapse_scan_pmd-to-use-folios.patch +mm-khugepaged-fix-the-address-passed-to-notifier-on-testing-young.patch -- 2.47.3