From: Greg Kroah-Hartman Date: Mon, 12 May 2025 14:05:27 +0000 (+0200) Subject: 6.12-stable patches X-Git-Tag: v5.15.183~19 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=61d5276cb5e47101d0c269c060e091e71a6b3412;p=thirdparty%2Fkernel%2Fstable-queue.git 6.12-stable patches added patches: bluetooth-btmtk-remove-resetting-mt7921-before-downloading-the-fw.patch bluetooth-btmtk-remove-the-resetting-step-before-downloading-the-fw.patch io_uring-always-arm-linked-timeouts-prior-to-issue.patch mm-page_alloc-don-t-steal-single-pages-from-biggest-buddy.patch mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch rust-allow-rust-1.87.0-s-clippy-ptr_eq-lint.patch rust-clean-rust-1.88.0-s-clippy-uninlined_format_args-lint.patch sched-eevdf-fix-se-slice-being-set-to-u64_max-and-resulting-crash.patch --- diff --git a/queue-6.12/bluetooth-btmtk-remove-resetting-mt7921-before-downloading-the-fw.patch b/queue-6.12/bluetooth-btmtk-remove-resetting-mt7921-before-downloading-the-fw.patch new file mode 100644 index 0000000000..91ffaf702c --- /dev/null +++ b/queue-6.12/bluetooth-btmtk-remove-resetting-mt7921-before-downloading-the-fw.patch @@ -0,0 +1,40 @@ +From a7208610761ae9b3bc109ddc493eb7c332fca5b2 Mon Sep 17 00:00:00 2001 +From: Hao Qin +Date: Fri, 10 Jan 2025 17:55:48 +0800 +Subject: Bluetooth: btmtk: Remove resetting mt7921 before downloading the fw + +From: Hao Qin + +commit a7208610761ae9b3bc109ddc493eb7c332fca5b2 upstream. + +Remove resetting mt7921 before downloading the fw, as it may cause +command timeout when performing the reset. + +Signed-off-by: Hao Qin +Signed-off-by: Luiz Augusto von Dentz +Cc: "Geoffrey D. Bennett" +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtk.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/drivers/bluetooth/btmtk.c ++++ b/drivers/bluetooth/btmtk.c +@@ -1329,7 +1329,6 @@ int btmtk_usb_setup(struct hci_dev *hdev + fwname = FIRMWARE_MT7668; + break; + case 0x7922: +- case 0x7961: + case 0x7925: + /* Reset the device to ensure it's in the initial state before + * downloading the firmware to ensure. +@@ -1337,7 +1336,8 @@ int btmtk_usb_setup(struct hci_dev *hdev + + if (!test_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags)) + btmtk_usb_subsys_reset(hdev, dev_id); +- ++ fallthrough; ++ case 0x7961: + btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, + fw_version, fw_flavor); + diff --git a/queue-6.12/bluetooth-btmtk-remove-the-resetting-step-before-downloading-the-fw.patch b/queue-6.12/bluetooth-btmtk-remove-the-resetting-step-before-downloading-the-fw.patch new file mode 100644 index 0000000000..7eb4098d25 --- /dev/null +++ b/queue-6.12/bluetooth-btmtk-remove-the-resetting-step-before-downloading-the-fw.patch @@ -0,0 +1,50 @@ +From 33634e2ab7c6369391e0ca4b9b97dc861e33d20e Mon Sep 17 00:00:00 2001 +From: Hao Qin +Date: Sat, 15 Mar 2025 10:27:30 +0800 +Subject: Bluetooth: btmtk: Remove the resetting step before downloading the fw + +From: Hao Qin + +commit 33634e2ab7c6369391e0ca4b9b97dc861e33d20e upstream. + +Remove the resetting step before downloading the fw, as it may cause +other usb devices to fail to initialise when connected during boot +on kernels 6.11 and newer. + +Signed-off-by: Hao Qin +Signed-off-by: Luiz Augusto von Dentz +Cc: "Geoffrey D. Bennett" +Signed-off-by: Greg Kroah-Hartman +--- + drivers/bluetooth/btmtk.c | 10 ---------- + 1 file changed, 10 deletions(-) + +--- a/drivers/bluetooth/btmtk.c ++++ b/drivers/bluetooth/btmtk.c +@@ -1330,13 +1330,6 @@ int btmtk_usb_setup(struct hci_dev *hdev + break; + case 0x7922: + case 0x7925: +- /* Reset the device to ensure it's in the initial state before +- * downloading the firmware to ensure. +- */ +- +- if (!test_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags)) +- btmtk_usb_subsys_reset(hdev, dev_id); +- fallthrough; + case 0x7961: + btmtk_fw_get_filename(fw_bin_name, sizeof(fw_bin_name), dev_id, + fw_version, fw_flavor); +@@ -1345,12 +1338,9 @@ int btmtk_usb_setup(struct hci_dev *hdev + btmtk_usb_hci_wmt_sync); + if (err < 0) { + bt_dev_err(hdev, "Failed to set up firmware (%d)", err); +- clear_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags); + return err; + } + +- set_bit(BTMTK_FIRMWARE_LOADED, &btmtk_data->flags); +- + /* It's Device EndPoint Reset Option Register */ + err = btmtk_usb_uhw_reg_write(hdev, MTK_EP_RST_OPT, + MTK_EP_RST_IN_OUT_OPT); diff --git a/queue-6.12/io_uring-always-arm-linked-timeouts-prior-to-issue.patch b/queue-6.12/io_uring-always-arm-linked-timeouts-prior-to-issue.patch new file mode 100644 index 0000000000..2662d0b5b1 --- /dev/null +++ b/queue-6.12/io_uring-always-arm-linked-timeouts-prior-to-issue.patch @@ -0,0 +1,155 @@ +From 51c54d9262c8f3d6d2b262acbb10c80068d44eff Mon Sep 17 00:00:00 2001 +From: Jens Axboe +Date: Mon, 5 May 2025 08:34:39 -0600 +Subject: io_uring: always arm linked timeouts prior to issue + +From: Jens Axboe + +Commit b53e523261bf058ea4a518b482222e7a277b186b upstream. + +There are a few spots where linked timeouts are armed, and not all of +them adhere to the pre-arm, attempt issue, post-arm pattern. This can +be problematic if the linked request returns that it will trigger a +callback later, and does so before the linked timeout is fully armed. + +Consolidate all the linked timeout handling into __io_issue_sqe(), +rather than have it spread throughout the various issue entry points. + +Cc: stable@vger.kernel.org +Link: https://github.com/axboe/liburing/issues/1390 +Reported-by: Chase Hiltz +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +--- + io_uring/io_uring.c | 50 +++++++++++++++----------------------------------- + 1 file changed, 15 insertions(+), 35 deletions(-) + +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -441,24 +441,6 @@ static struct io_kiocb *__io_prep_linked + return req->link; + } + +-static inline struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req) +-{ +- if (likely(!(req->flags & REQ_F_ARM_LTIMEOUT))) +- return NULL; +- return __io_prep_linked_timeout(req); +-} +- +-static noinline void __io_arm_ltimeout(struct io_kiocb *req) +-{ +- io_queue_linked_timeout(__io_prep_linked_timeout(req)); +-} +- +-static inline void io_arm_ltimeout(struct io_kiocb *req) +-{ +- if (unlikely(req->flags & REQ_F_ARM_LTIMEOUT)) +- __io_arm_ltimeout(req); +-} +- + static void io_prep_async_work(struct io_kiocb *req) + { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; +@@ -511,7 +493,6 @@ static void io_prep_async_link(struct io + + static void io_queue_iowq(struct io_kiocb *req) + { +- struct io_kiocb *link = io_prep_linked_timeout(req); + struct io_uring_task *tctx = req->task->io_uring; + + BUG_ON(!tctx); +@@ -536,8 +517,6 @@ static void io_queue_iowq(struct io_kioc + + trace_io_uring_queue_async_work(req, io_wq_is_hashed(&req->work)); + io_wq_enqueue(tctx->io_wq, &req->work); +- if (link) +- io_queue_linked_timeout(link); + } + + static void io_req_queue_iowq_tw(struct io_kiocb *req, struct io_tw_state *ts) +@@ -1731,17 +1710,24 @@ static bool io_assign_file(struct io_kio + return !!req->file; + } + ++#define REQ_ISSUE_SLOW_FLAGS (REQ_F_CREDS | REQ_F_ARM_LTIMEOUT) ++ + static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags) + { + const struct io_issue_def *def = &io_issue_defs[req->opcode]; + const struct cred *creds = NULL; ++ struct io_kiocb *link = NULL; + int ret; + + if (unlikely(!io_assign_file(req, def, issue_flags))) + return -EBADF; + +- if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred())) +- creds = override_creds(req->creds); ++ if (unlikely(req->flags & REQ_ISSUE_SLOW_FLAGS)) { ++ if ((req->flags & REQ_F_CREDS) && req->creds != current_cred()) ++ creds = override_creds(req->creds); ++ if (req->flags & REQ_F_ARM_LTIMEOUT) ++ link = __io_prep_linked_timeout(req); ++ } + + if (!def->audit_skip) + audit_uring_entry(req->opcode); +@@ -1751,8 +1737,12 @@ static int io_issue_sqe(struct io_kiocb + if (!def->audit_skip) + audit_uring_exit(!ret, ret); + +- if (creds) +- revert_creds(creds); ++ if (unlikely(creds || link)) { ++ if (creds) ++ revert_creds(creds); ++ if (link) ++ io_queue_linked_timeout(link); ++ } + + if (ret == IOU_OK) { + if (issue_flags & IO_URING_F_COMPLETE_DEFER) +@@ -1765,7 +1755,6 @@ static int io_issue_sqe(struct io_kiocb + + if (ret == IOU_ISSUE_SKIP_COMPLETE) { + ret = 0; +- io_arm_ltimeout(req); + + /* If the op doesn't have a file, we're not polling for it */ + if ((req->ctx->flags & IORING_SETUP_IOPOLL) && def->iopoll_queue) +@@ -1808,8 +1797,6 @@ void io_wq_submit_work(struct io_wq_work + else + req_ref_get(req); + +- io_arm_ltimeout(req); +- + /* either cancelled or io-wq is dying, so don't touch tctx->iowq */ + if (atomic_read(&work->flags) & IO_WQ_WORK_CANCEL) { + fail: +@@ -1929,15 +1916,11 @@ struct file *io_file_get_normal(struct i + static void io_queue_async(struct io_kiocb *req, int ret) + __must_hold(&req->ctx->uring_lock) + { +- struct io_kiocb *linked_timeout; +- + if (ret != -EAGAIN || (req->flags & REQ_F_NOWAIT)) { + io_req_defer_failed(req, ret); + return; + } + +- linked_timeout = io_prep_linked_timeout(req); +- + switch (io_arm_poll_handler(req, 0)) { + case IO_APOLL_READY: + io_kbuf_recycle(req, 0); +@@ -1950,9 +1933,6 @@ static void io_queue_async(struct io_kio + case IO_APOLL_OK: + break; + } +- +- if (linked_timeout) +- io_queue_linked_timeout(linked_timeout); + } + + static inline void io_queue_sqe(struct io_kiocb *req) diff --git a/queue-6.12/mm-page_alloc-don-t-steal-single-pages-from-biggest-buddy.patch b/queue-6.12/mm-page_alloc-don-t-steal-single-pages-from-biggest-buddy.patch new file mode 100644 index 0000000000..23276a3803 --- /dev/null +++ b/queue-6.12/mm-page_alloc-don-t-steal-single-pages-from-biggest-buddy.patch @@ -0,0 +1,220 @@ +From c2f6ea38fc1b640aa7a2e155cc1c0410ff91afa2 Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Mon, 24 Feb 2025 19:08:24 -0500 +Subject: mm: page_alloc: don't steal single pages from biggest buddy + +From: Johannes Weiner + +commit c2f6ea38fc1b640aa7a2e155cc1c0410ff91afa2 upstream. + +The fallback code searches for the biggest buddy first in an attempt to +steal the whole block and encourage type grouping down the line. + +The approach used to be this: + +- Non-movable requests will split the largest buddy and steal the + remainder. This splits up contiguity, but it allows subsequent + requests of this type to fall back into adjacent space. + +- Movable requests go and look for the smallest buddy instead. The + thinking is that movable requests can be compacted, so grouping is + less important than retaining contiguity. + +c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block +conversion") enforces freelist type hygiene, which restricts stealing to +either claiming the whole block or just taking the requested chunk; no +additional pages or buddy remainders can be stolen any more. + +The patch mishandled when to switch to finding the smallest buddy in that +new reality. As a result, it may steal the exact request size, but from +the biggest buddy. This causes fracturing for no good reason. + +Fix this by committing to the new behavior: either steal the whole block, +or fall back to the smallest buddy. + +Remove single-page stealing from steal_suitable_fallback(). Rename it to +try_to_steal_block() to make the intentions clear. If this fails, always +fall back to the smallest buddy. + +The following is from 4 runs of mmtest's thpchallenge. "Pollute" is +single page fallback, "steal" is conversion of a partially used block. +The numbers for free block conversions (omitted) are comparable. + + vanilla patched + +@pollute[unmovable from reclaimable]: 27 106 +@pollute[unmovable from movable]: 82 46 +@pollute[reclaimable from unmovable]: 256 83 +@pollute[reclaimable from movable]: 46 8 +@pollute[movable from unmovable]: 4841 868 +@pollute[movable from reclaimable]: 5278 12568 + +@steal[unmovable from reclaimable]: 11 12 +@steal[unmovable from movable]: 113 49 +@steal[reclaimable from unmovable]: 19 34 +@steal[reclaimable from movable]: 47 21 +@steal[movable from unmovable]: 250 183 +@steal[movable from reclaimable]: 81 93 + +The allocator appears to do a better job at keeping stealing and polluting +to the first fallback preference. As a result, the numbers for "from +movable" - the least preferred fallback option, and most detrimental to +compactability - are down across the board. + +Link: https://lkml.kernel.org/r/20250225001023.1494422-2-hannes@cmpxchg.org +Fixes: c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion") +Signed-off-by: Johannes Weiner +Suggested-by: Vlastimil Babka +Reviewed-by: Brendan Jackman +Reviewed-by: Vlastimil Babka +Signed-off-by: Andrew Morton +Signed-off-by: Johannes Weiner +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 80 +++++++++++++++++++++++--------------------------------- + 1 file changed, 34 insertions(+), 46 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -1907,13 +1907,12 @@ static inline bool boost_watermark(struc + * can claim the whole pageblock for the requested migratetype. If not, we check + * the pageblock for constituent pages; if at least half of the pages are free + * or compatible, we can still claim the whole block, so pages freed in the +- * future will be put on the correct free list. Otherwise, we isolate exactly +- * the order we need from the fallback block and leave its migratetype alone. ++ * future will be put on the correct free list. + */ + static struct page * +-steal_suitable_fallback(struct zone *zone, struct page *page, +- int current_order, int order, int start_type, +- unsigned int alloc_flags, bool whole_block) ++try_to_steal_block(struct zone *zone, struct page *page, ++ int current_order, int order, int start_type, ++ unsigned int alloc_flags) + { + int free_pages, movable_pages, alike_pages; + unsigned long start_pfn; +@@ -1926,7 +1925,7 @@ steal_suitable_fallback(struct zone *zon + * highatomic accounting. + */ + if (is_migrate_highatomic(block_type)) +- goto single_page; ++ return NULL; + + /* Take ownership for orders >= pageblock_order */ + if (current_order >= pageblock_order) { +@@ -1947,14 +1946,10 @@ steal_suitable_fallback(struct zone *zon + if (boost_watermark(zone) && (alloc_flags & ALLOC_KSWAPD)) + set_bit(ZONE_BOOSTED_WATERMARK, &zone->flags); + +- /* We are not allowed to try stealing from the whole block */ +- if (!whole_block) +- goto single_page; +- + /* moving whole block can fail due to zone boundary conditions */ + if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages, + &movable_pages)) +- goto single_page; ++ return NULL; + + /* + * Determine how many pages are compatible with our allocation. +@@ -1987,9 +1982,7 @@ steal_suitable_fallback(struct zone *zon + return __rmqueue_smallest(zone, order, start_type); + } + +-single_page: +- page_del_and_expand(zone, page, order, current_order, block_type); +- return page; ++ return NULL; + } + + /* +@@ -2171,14 +2164,19 @@ static bool unreserve_highatomic_pageblo + } + + /* +- * Try finding a free buddy page on the fallback list and put it on the free +- * list of requested migratetype, possibly along with other pages from the same +- * block, depending on fragmentation avoidance heuristics. Returns true if +- * fallback was found so that __rmqueue_smallest() can grab it. ++ * Try finding a free buddy page on the fallback list. ++ * ++ * This will attempt to steal a whole pageblock for the requested type ++ * to ensure grouping of such requests in the future. ++ * ++ * If a whole block cannot be stolen, regress to __rmqueue_smallest() ++ * logic to at least break up as little contiguity as possible. + * + * The use of signed ints for order and current_order is a deliberate + * deviation from the rest of this file, to make the for loop + * condition simpler. ++ * ++ * Return the stolen page, or NULL if none can be found. + */ + static __always_inline struct page * + __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, +@@ -2212,45 +2210,35 @@ __rmqueue_fallback(struct zone *zone, in + if (fallback_mt == -1) + continue; + +- /* +- * We cannot steal all free pages from the pageblock and the +- * requested migratetype is movable. In that case it's better to +- * steal and split the smallest available page instead of the +- * largest available page, because even if the next movable +- * allocation falls back into a different pageblock than this +- * one, it won't cause permanent fragmentation. +- */ +- if (!can_steal && start_migratetype == MIGRATE_MOVABLE +- && current_order > order) +- goto find_smallest; ++ if (!can_steal) ++ break; + +- goto do_steal; ++ page = get_page_from_free_area(area, fallback_mt); ++ page = try_to_steal_block(zone, page, current_order, order, ++ start_migratetype, alloc_flags); ++ if (page) ++ goto got_one; + } + +- return NULL; ++ if (alloc_flags & ALLOC_NOFRAGMENT) ++ return NULL; + +-find_smallest: ++ /* No luck stealing blocks. Find the smallest fallback page */ + for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { + area = &(zone->free_area[current_order]); + fallback_mt = find_suitable_fallback(area, current_order, + start_migratetype, false, &can_steal); +- if (fallback_mt != -1) +- break; +- } +- +- /* +- * This should not happen - we already found a suitable fallback +- * when looking for the largest page. +- */ +- VM_BUG_ON(current_order > MAX_PAGE_ORDER); ++ if (fallback_mt == -1) ++ continue; + +-do_steal: +- page = get_page_from_free_area(area, fallback_mt); ++ page = get_page_from_free_area(area, fallback_mt); ++ page_del_and_expand(zone, page, order, current_order, fallback_mt); ++ goto got_one; ++ } + +- /* take off list, maybe claim block, expand remainder */ +- page = steal_suitable_fallback(zone, page, current_order, order, +- start_migratetype, alloc_flags, can_steal); ++ return NULL; + ++got_one: + trace_mm_page_alloc_extfrag(page, order, current_order, + start_migratetype, fallback_mt); + diff --git a/queue-6.12/mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch b/queue-6.12/mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch new file mode 100644 index 0000000000..3cde54c562 --- /dev/null +++ b/queue-6.12/mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch @@ -0,0 +1,274 @@ +From 90abee6d7895d5eef18c91d870d8168be4e76e9d Mon Sep 17 00:00:00 2001 +From: Johannes Weiner +Date: Mon, 7 Apr 2025 14:01:53 -0400 +Subject: mm: page_alloc: speed up fallbacks in rmqueue_bulk() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Johannes Weiner + +commit 90abee6d7895d5eef18c91d870d8168be4e76e9d upstream. + +The test robot identified c2f6ea38fc1b ("mm: page_alloc: don't steal +single pages from biggest buddy") as the root cause of a 56.4% regression +in vm-scalability::lru-file-mmap-read. + +Carlos reports an earlier patch, c0cd6f557b90 ("mm: page_alloc: fix +freelist movement during block conversion"), as the root cause for a +regression in worst-case zone->lock+irqoff hold times. + +Both of these patches modify the page allocator's fallback path to be less +greedy in an effort to stave off fragmentation. The flip side of this is +that fallbacks are also less productive each time around, which means the +fallback search can run much more frequently. + +Carlos' traces point to rmqueue_bulk() specifically, which tries to refill +the percpu cache by allocating a large batch of pages in a loop. It +highlights how once the native freelists are exhausted, the fallback code +first scans orders top-down for whole blocks to claim, then falls back to +a bottom-up search for the smallest buddy to steal. For the next batch +page, it goes through the same thing again. + +This can be made more efficient. Since rmqueue_bulk() holds the +zone->lock over the entire batch, the freelists are not subject to outside +changes; when the search for a block to claim has already failed, there is +no point in trying again for the next page. + +Modify __rmqueue() to remember the last successful fallback mode, and +restart directly from there on the next rmqueue_bulk() iteration. + +Oliver confirms that this improves beyond the regression that the test +robot reported against c2f6ea38fc1b: + +commit: + f3b92176f4 ("tools/selftests: add guard region test for /proc/$pid/pagemap") + c2f6ea38fc ("mm: page_alloc: don't steal single pages from biggest buddy") + acc4d5ff0b ("Merge tag 'net-6.15-rc0' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net") + 2c847f27c3 ("mm: page_alloc: speed up fallbacks in rmqueue_bulk()") <--- your patch + +f3b92176f4f7100f c2f6ea38fc1b640aa7a2e155cc1 acc4d5ff0b61eb1715c498b6536 2c847f27c37da65a93d23c237c5 +---------------- --------------------------- --------------------------- --------------------------- + %stddev %change %stddev %change %stddev %change %stddev + \ | \ | \ | \ + 25525364 ± 3% -56.4% 11135467 -57.8% 10779336 +31.6% 33581409 vm-scalability.throughput + +Carlos confirms that worst-case times are almost fully recovered +compared to before the earlier culprit patch: + + 2dd482ba627d (before freelist hygiene): 1ms + c0cd6f557b90 (after freelist hygiene): 90ms + next-20250319 (steal smallest buddy): 280ms + this patch : 8ms + +[jackmanb@google.com: comment updates] + Link: https://lkml.kernel.org/r/D92AC0P9594X.3BML64MUKTF8Z@google.com +[hannes@cmpxchg.org: reset rmqueue_mode in rmqueue_buddy() error loop, per Yunsheng Lin] + Link: https://lkml.kernel.org/r/20250409140023.GA2313@cmpxchg.org +Link: https://lkml.kernel.org/r/20250407180154.63348-1-hannes@cmpxchg.org +Fixes: c0cd6f557b90 ("mm: page_alloc: fix freelist movement during block conversion") +Fixes: c2f6ea38fc1b ("mm: page_alloc: don't steal single pages from biggest buddy") +Signed-off-by: Johannes Weiner +Signed-off-by: Brendan Jackman +Reported-by: kernel test robot +Reported-by: Carlos Song +Tested-by: Carlos Song +Tested-by: kernel test robot +Closes: https://lore.kernel.org/oe-lkp/202503271547.fc08b188-lkp@intel.com +Reviewed-by: Brendan Jackman +Tested-by: Shivank Garg +Acked-by: Zi Yan +Reviewed-by: Vlastimil Babka +Cc: [6.10+] +Signed-off-by: Andrew Morton +Signed-off-by: Johannes Weiner +Signed-off-by: Greg Kroah-Hartman +--- + mm/page_alloc.c | 113 +++++++++++++++++++++++++++++++++++++++----------------- + 1 file changed, 80 insertions(+), 33 deletions(-) + +--- a/mm/page_alloc.c ++++ b/mm/page_alloc.c +@@ -2164,22 +2164,15 @@ static bool unreserve_highatomic_pageblo + } + + /* +- * Try finding a free buddy page on the fallback list. +- * +- * This will attempt to steal a whole pageblock for the requested type +- * to ensure grouping of such requests in the future. +- * +- * If a whole block cannot be stolen, regress to __rmqueue_smallest() +- * logic to at least break up as little contiguity as possible. ++ * Try to allocate from some fallback migratetype by claiming the entire block, ++ * i.e. converting it to the allocation's start migratetype. + * + * The use of signed ints for order and current_order is a deliberate + * deviation from the rest of this file, to make the for loop + * condition simpler. +- * +- * Return the stolen page, or NULL if none can be found. + */ + static __always_inline struct page * +-__rmqueue_fallback(struct zone *zone, int order, int start_migratetype, ++__rmqueue_claim(struct zone *zone, int order, int start_migratetype, + unsigned int alloc_flags) + { + struct free_area *area; +@@ -2216,14 +2209,29 @@ __rmqueue_fallback(struct zone *zone, in + page = get_page_from_free_area(area, fallback_mt); + page = try_to_steal_block(zone, page, current_order, order, + start_migratetype, alloc_flags); +- if (page) +- goto got_one; ++ if (page) { ++ trace_mm_page_alloc_extfrag(page, order, current_order, ++ start_migratetype, fallback_mt); ++ return page; ++ } + } + +- if (alloc_flags & ALLOC_NOFRAGMENT) +- return NULL; ++ return NULL; ++} ++ ++/* ++ * Try to steal a single page from some fallback migratetype. Leave the rest of ++ * the block as its current migratetype, potentially causing fragmentation. ++ */ ++static __always_inline struct page * ++__rmqueue_steal(struct zone *zone, int order, int start_migratetype) ++{ ++ struct free_area *area; ++ int current_order; ++ struct page *page; ++ int fallback_mt; ++ bool can_steal; + +- /* No luck stealing blocks. Find the smallest fallback page */ + for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { + area = &(zone->free_area[current_order]); + fallback_mt = find_suitable_fallback(area, current_order, +@@ -2233,25 +2241,28 @@ __rmqueue_fallback(struct zone *zone, in + + page = get_page_from_free_area(area, fallback_mt); + page_del_and_expand(zone, page, order, current_order, fallback_mt); +- goto got_one; ++ trace_mm_page_alloc_extfrag(page, order, current_order, ++ start_migratetype, fallback_mt); ++ return page; + } + + return NULL; +- +-got_one: +- trace_mm_page_alloc_extfrag(page, order, current_order, +- start_migratetype, fallback_mt); +- +- return page; + } + ++enum rmqueue_mode { ++ RMQUEUE_NORMAL, ++ RMQUEUE_CMA, ++ RMQUEUE_CLAIM, ++ RMQUEUE_STEAL, ++}; ++ + /* + * Do the hard work of removing an element from the buddy allocator. + * Call me with the zone->lock already held. + */ + static __always_inline struct page * + __rmqueue(struct zone *zone, unsigned int order, int migratetype, +- unsigned int alloc_flags) ++ unsigned int alloc_flags, enum rmqueue_mode *mode) + { + struct page *page; + +@@ -2270,16 +2281,49 @@ __rmqueue(struct zone *zone, unsigned in + } + } + +- page = __rmqueue_smallest(zone, order, migratetype); +- if (unlikely(!page)) { +- if (alloc_flags & ALLOC_CMA) ++ /* ++ * First try the freelists of the requested migratetype, then try ++ * fallbacks modes with increasing levels of fragmentation risk. ++ * ++ * The fallback logic is expensive and rmqueue_bulk() calls in ++ * a loop with the zone->lock held, meaning the freelists are ++ * not subject to any outside changes. Remember in *mode where ++ * we found pay dirt, to save us the search on the next call. ++ */ ++ switch (*mode) { ++ case RMQUEUE_NORMAL: ++ page = __rmqueue_smallest(zone, order, migratetype); ++ if (page) ++ return page; ++ fallthrough; ++ case RMQUEUE_CMA: ++ if (alloc_flags & ALLOC_CMA) { + page = __rmqueue_cma_fallback(zone, order); +- +- if (!page) +- page = __rmqueue_fallback(zone, order, migratetype, +- alloc_flags); ++ if (page) { ++ *mode = RMQUEUE_CMA; ++ return page; ++ } ++ } ++ fallthrough; ++ case RMQUEUE_CLAIM: ++ page = __rmqueue_claim(zone, order, migratetype, alloc_flags); ++ if (page) { ++ /* Replenished preferred freelist, back to normal mode. */ ++ *mode = RMQUEUE_NORMAL; ++ return page; ++ } ++ fallthrough; ++ case RMQUEUE_STEAL: ++ if (!(alloc_flags & ALLOC_NOFRAGMENT)) { ++ page = __rmqueue_steal(zone, order, migratetype); ++ if (page) { ++ *mode = RMQUEUE_STEAL; ++ return page; ++ } ++ } + } +- return page; ++ ++ return NULL; + } + + /* +@@ -2291,13 +2335,14 @@ static int rmqueue_bulk(struct zone *zon + unsigned long count, struct list_head *list, + int migratetype, unsigned int alloc_flags) + { ++ enum rmqueue_mode rmqm = RMQUEUE_NORMAL; + unsigned long flags; + int i; + + spin_lock_irqsave(&zone->lock, flags); + for (i = 0; i < count; ++i) { + struct page *page = __rmqueue(zone, order, migratetype, +- alloc_flags); ++ alloc_flags, &rmqm); + if (unlikely(page == NULL)) + break; + +@@ -2898,7 +2943,9 @@ struct page *rmqueue_buddy(struct zone * + if (alloc_flags & ALLOC_HIGHATOMIC) + page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC); + if (!page) { +- page = __rmqueue(zone, order, migratetype, alloc_flags); ++ enum rmqueue_mode rmqm = RMQUEUE_NORMAL; ++ ++ page = __rmqueue(zone, order, migratetype, alloc_flags, &rmqm); + + /* + * If the allocation fails, allow OOM handling and diff --git a/queue-6.12/rust-allow-rust-1.87.0-s-clippy-ptr_eq-lint.patch b/queue-6.12/rust-allow-rust-1.87.0-s-clippy-ptr_eq-lint.patch new file mode 100644 index 0000000000..644d117d36 --- /dev/null +++ b/queue-6.12/rust-allow-rust-1.87.0-s-clippy-ptr_eq-lint.patch @@ -0,0 +1,65 @@ +From a39f3087092716f2bd531d6fdc20403c3dc2a879 Mon Sep 17 00:00:00 2001 +From: Miguel Ojeda +Date: Fri, 2 May 2025 16:02:34 +0200 +Subject: rust: allow Rust 1.87.0's `clippy::ptr_eq` lint + +From: Miguel Ojeda + +commit a39f3087092716f2bd531d6fdc20403c3dc2a879 upstream. + +Starting with Rust 1.87.0 (expected 2025-05-15) [1], Clippy may expand +the `ptr_eq` lint, e.g.: + + error: use `core::ptr::eq` when comparing raw pointers + --> rust/kernel/list.rs:438:12 + | + 438 | if self.first == item { + | ^^^^^^^^^^^^^^^^^^ help: try: `core::ptr::eq(self.first, item)` + | + = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#ptr_eq + = note: `-D clippy::ptr-eq` implied by `-D warnings` + = help: to override `-D warnings` add `#[allow(clippy::ptr_eq)]` + +It is expected that a PR to relax the lint will be backported [2] by +the time Rust 1.87.0 releases, since the lint was considered too eager +(at least by default) [3]. + +Thus allow the lint temporarily just in case. + +Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). +Link: https://github.com/rust-lang/rust-clippy/pull/14339 [1] +Link: https://github.com/rust-lang/rust-clippy/pull/14526 [2] +Link: https://github.com/rust-lang/rust-clippy/issues/14525 [3] +Link: https://lore.kernel.org/r/20250502140237.1659624-3-ojeda@kernel.org +[ Converted to `allow`s since backport was confirmed. - Miguel ] +Signed-off-by: Miguel Ojeda +Signed-off-by: Greg Kroah-Hartman +--- + rust/kernel/alloc/kvec.rs | 3 +++ + rust/kernel/list.rs | 3 +++ + 2 files changed, 6 insertions(+) + +--- a/rust/kernel/alloc/kvec.rs ++++ b/rust/kernel/alloc/kvec.rs +@@ -2,6 +2,9 @@ + + //! Implementation of [`Vec`]. + ++// May not be needed in Rust 1.87.0 (pending beta backport). ++#![allow(clippy::ptr_eq)] ++ + use super::{ + allocator::{KVmalloc, Kmalloc, Vmalloc}, + layout::ArrayLayout, +--- a/rust/kernel/list.rs ++++ b/rust/kernel/list.rs +@@ -4,6 +4,9 @@ + + //! A linked list implementation. + ++// May not be needed in Rust 1.87.0 (pending beta backport). ++#![allow(clippy::ptr_eq)] ++ + use crate::init::PinInit; + use crate::sync::ArcBorrow; + use crate::types::Opaque; diff --git a/queue-6.12/rust-clean-rust-1.88.0-s-clippy-uninlined_format_args-lint.patch b/queue-6.12/rust-clean-rust-1.88.0-s-clippy-uninlined_format_args-lint.patch new file mode 100644 index 0000000000..09a793491f --- /dev/null +++ b/queue-6.12/rust-clean-rust-1.88.0-s-clippy-uninlined_format_args-lint.patch @@ -0,0 +1,245 @@ +From 211dcf77856db64c73e0c3b9ce0c624ec855daca Mon Sep 17 00:00:00 2001 +From: Miguel Ojeda +Date: Fri, 2 May 2025 16:02:37 +0200 +Subject: rust: clean Rust 1.88.0's `clippy::uninlined_format_args` lint + +From: Miguel Ojeda + +commit 211dcf77856db64c73e0c3b9ce0c624ec855daca upstream. + +Starting with Rust 1.88.0 (expected 2025-06-26) [1], `rustc` may move +back the `uninlined_format_args` to `style` from `pedantic` (it was +there waiting for rust-analyzer suppotr), and thus we will start to see +lints like: + + warning: variables can be used directly in the `format!` string + --> rust/macros/kunit.rs:105:37 + | + 105 | let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args + help: change this to + | + 105 - let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{}", test); + 105 + let kunit_wrapper_fn_name = format!("kunit_rust_wrapper_{test}"); + +There is even a case that is a pure removal: + + warning: variables can be used directly in the `format!` string + --> rust/macros/module.rs:51:13 + | + 51 | format!("{field}={content}\0", field = field, content = content) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#uninlined_format_args + help: change this to + | + 51 - format!("{field}={content}\0", field = field, content = content) + 51 + format!("{field}={content}\0") + +The lints all seem like nice cleanups, thus just apply them. + +We may want to disable `allow-mixed-uninlined-format-args` in the future. + +Cc: stable@vger.kernel.org # Needed in 6.12.y and later (Rust is pinned in older LTSs). +Link: https://github.com/rust-lang/rust-clippy/pull/14160 [1] +Acked-by: Benno Lossin +Reviewed-by: Tamir Duberstein +Reviewed-by: Alice Ryhl +Link: https://lore.kernel.org/r/20250502140237.1659624-6-ojeda@kernel.org +Signed-off-by: Miguel Ojeda +Signed-off-by: Greg Kroah-Hartman +--- + rust/kernel/str.rs | 46 ++++++++++++++++++++++----------------------- + rust/macros/module.rs | 19 ++++-------------- + rust/macros/pinned_drop.rs | 3 -- + 3 files changed, 29 insertions(+), 39 deletions(-) + +--- a/rust/kernel/str.rs ++++ b/rust/kernel/str.rs +@@ -55,7 +55,7 @@ impl fmt::Display for BStr { + b'\r' => f.write_str("\\r")?, + // Printable characters. + 0x20..=0x7e => f.write_char(b as char)?, +- _ => write!(f, "\\x{:02x}", b)?, ++ _ => write!(f, "\\x{b:02x}")?, + } + } + Ok(()) +@@ -90,7 +90,7 @@ impl fmt::Debug for BStr { + b'\\' => f.write_str("\\\\")?, + // Printable characters. + 0x20..=0x7e => f.write_char(b as char)?, +- _ => write!(f, "\\x{:02x}", b)?, ++ _ => write!(f, "\\x{b:02x}")?, + } + } + f.write_char('"') +@@ -397,7 +397,7 @@ impl fmt::Display for CStr { + // Printable character. + f.write_char(c as char)?; + } else { +- write!(f, "\\x{:02x}", c)?; ++ write!(f, "\\x{c:02x}")?; + } + } + Ok(()) +@@ -428,7 +428,7 @@ impl fmt::Debug for CStr { + // Printable characters. + b'\"' => f.write_str("\\\"")?, + 0x20..=0x7e => f.write_char(c as char)?, +- _ => write!(f, "\\x{:02x}", c)?, ++ _ => write!(f, "\\x{c:02x}")?, + } + } + f.write_str("\"") +@@ -588,13 +588,13 @@ mod tests { + #[test] + fn test_cstr_display() { + let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); +- assert_eq!(format!("{}", hello_world), "hello, world!"); ++ assert_eq!(format!("{hello_world}"), "hello, world!"); + let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); +- assert_eq!(format!("{}", non_printables), "\\x01\\x09\\x0a"); ++ assert_eq!(format!("{non_printables}"), "\\x01\\x09\\x0a"); + let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); +- assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); ++ assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); + let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); +- assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); ++ assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); + } + + #[test] +@@ -605,47 +605,47 @@ mod tests { + bytes[i as usize] = i.wrapping_add(1); + } + let cstr = CStr::from_bytes_with_nul(&bytes).unwrap(); +- assert_eq!(format!("{}", cstr), ALL_ASCII_CHARS); ++ assert_eq!(format!("{cstr}"), ALL_ASCII_CHARS); + } + + #[test] + fn test_cstr_debug() { + let hello_world = CStr::from_bytes_with_nul(b"hello, world!\0").unwrap(); +- assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); ++ assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); + let non_printables = CStr::from_bytes_with_nul(b"\x01\x09\x0a\0").unwrap(); +- assert_eq!(format!("{:?}", non_printables), "\"\\x01\\x09\\x0a\""); ++ assert_eq!(format!("{non_printables:?}"), "\"\\x01\\x09\\x0a\""); + let non_ascii = CStr::from_bytes_with_nul(b"d\xe9j\xe0 vu\0").unwrap(); +- assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); ++ assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); + let good_bytes = CStr::from_bytes_with_nul(b"\xf0\x9f\xa6\x80\0").unwrap(); +- assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); ++ assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); + } + + #[test] + fn test_bstr_display() { + let hello_world = BStr::from_bytes(b"hello, world!"); +- assert_eq!(format!("{}", hello_world), "hello, world!"); ++ assert_eq!(format!("{hello_world}"), "hello, world!"); + let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); +- assert_eq!(format!("{}", escapes), "_\\t_\\n_\\r_\\_'_\"_"); ++ assert_eq!(format!("{escapes}"), "_\\t_\\n_\\r_\\_'_\"_"); + let others = BStr::from_bytes(b"\x01"); +- assert_eq!(format!("{}", others), "\\x01"); ++ assert_eq!(format!("{others}"), "\\x01"); + let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); +- assert_eq!(format!("{}", non_ascii), "d\\xe9j\\xe0 vu"); ++ assert_eq!(format!("{non_ascii}"), "d\\xe9j\\xe0 vu"); + let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); +- assert_eq!(format!("{}", good_bytes), "\\xf0\\x9f\\xa6\\x80"); ++ assert_eq!(format!("{good_bytes}"), "\\xf0\\x9f\\xa6\\x80"); + } + + #[test] + fn test_bstr_debug() { + let hello_world = BStr::from_bytes(b"hello, world!"); +- assert_eq!(format!("{:?}", hello_world), "\"hello, world!\""); ++ assert_eq!(format!("{hello_world:?}"), "\"hello, world!\""); + let escapes = BStr::from_bytes(b"_\t_\n_\r_\\_\'_\"_"); +- assert_eq!(format!("{:?}", escapes), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); ++ assert_eq!(format!("{escapes:?}"), "\"_\\t_\\n_\\r_\\\\_'_\\\"_\""); + let others = BStr::from_bytes(b"\x01"); +- assert_eq!(format!("{:?}", others), "\"\\x01\""); ++ assert_eq!(format!("{others:?}"), "\"\\x01\""); + let non_ascii = BStr::from_bytes(b"d\xe9j\xe0 vu"); +- assert_eq!(format!("{:?}", non_ascii), "\"d\\xe9j\\xe0 vu\""); ++ assert_eq!(format!("{non_ascii:?}"), "\"d\\xe9j\\xe0 vu\""); + let good_bytes = BStr::from_bytes(b"\xf0\x9f\xa6\x80"); +- assert_eq!(format!("{:?}", good_bytes), "\"\\xf0\\x9f\\xa6\\x80\""); ++ assert_eq!(format!("{good_bytes:?}"), "\"\\xf0\\x9f\\xa6\\x80\""); + } + } + +--- a/rust/macros/module.rs ++++ b/rust/macros/module.rs +@@ -48,7 +48,7 @@ impl<'a> ModInfoBuilder<'a> { + ) + } else { + // Loadable modules' modinfo strings go as-is. +- format!("{field}={content}\0", field = field, content = content) ++ format!("{field}={content}\0") + }; + + write!( +@@ -124,10 +124,7 @@ impl ModuleInfo { + }; + + if seen_keys.contains(&key) { +- panic!( +- "Duplicated key \"{}\". Keys can only be specified once.", +- key +- ); ++ panic!("Duplicated key \"{key}\". Keys can only be specified once."); + } + + assert_eq!(expect_punct(it), ':'); +@@ -140,10 +137,7 @@ impl ModuleInfo { + "license" => info.license = expect_string_ascii(it), + "alias" => info.alias = Some(expect_string_array(it)), + "firmware" => info.firmware = Some(expect_string_array(it)), +- _ => panic!( +- "Unknown key \"{}\". Valid keys are: {:?}.", +- key, EXPECTED_KEYS +- ), ++ _ => panic!("Unknown key \"{key}\". Valid keys are: {EXPECTED_KEYS:?}."), + } + + assert_eq!(expect_punct(it), ','); +@@ -155,7 +149,7 @@ impl ModuleInfo { + + for key in REQUIRED_KEYS { + if !seen_keys.iter().any(|e| e == key) { +- panic!("Missing required key \"{}\".", key); ++ panic!("Missing required key \"{key}\"."); + } + } + +@@ -167,10 +161,7 @@ impl ModuleInfo { + } + + if seen_keys != ordered_keys { +- panic!( +- "Keys are not ordered as expected. Order them like: {:?}.", +- ordered_keys +- ); ++ panic!("Keys are not ordered as expected. Order them like: {ordered_keys:?}."); + } + + info +--- a/rust/macros/pinned_drop.rs ++++ b/rust/macros/pinned_drop.rs +@@ -25,8 +25,7 @@ pub(crate) fn pinned_drop(_args: TokenSt + // Found the end of the generics, this should be `PinnedDrop`. + assert!( + matches!(tt, TokenTree::Ident(i) if i.to_string() == "PinnedDrop"), +- "expected 'PinnedDrop', found: '{:?}'", +- tt ++ "expected 'PinnedDrop', found: '{tt:?}'" + ); + pinned_drop_idx = Some(i); + break; diff --git a/queue-6.12/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-resulting-crash.patch b/queue-6.12/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-resulting-crash.patch new file mode 100644 index 0000000000..afaf27deb0 --- /dev/null +++ b/queue-6.12/sched-eevdf-fix-se-slice-being-set-to-u64_max-and-resulting-crash.patch @@ -0,0 +1,86 @@ +From bbce3de72be56e4b5f68924b7da9630cc89aa1a8 Mon Sep 17 00:00:00 2001 +From: Omar Sandoval +Date: Fri, 25 Apr 2025 01:51:24 -0700 +Subject: sched/eevdf: Fix se->slice being set to U64_MAX and resulting crash + +From: Omar Sandoval + +commit bbce3de72be56e4b5f68924b7da9630cc89aa1a8 upstream. + +There is a code path in dequeue_entities() that can set the slice of a +sched_entity to U64_MAX, which sometimes results in a crash. + +The offending case is when dequeue_entities() is called to dequeue a +delayed group entity, and then the entity's parent's dequeue is delayed. +In that case: + +1. In the if (entity_is_task(se)) else block at the beginning of + dequeue_entities(), slice is set to + cfs_rq_min_slice(group_cfs_rq(se)). If the entity was delayed, then + it has no queued tasks, so cfs_rq_min_slice() returns U64_MAX. +2. The first for_each_sched_entity() loop dequeues the entity. +3. If the entity was its parent's only child, then the next iteration + tries to dequeue the parent. +4. If the parent's dequeue needs to be delayed, then it breaks from the + first for_each_sched_entity() loop _without updating slice_. +5. The second for_each_sched_entity() loop sets the parent's ->slice to + the saved slice, which is still U64_MAX. + +This throws off subsequent calculations with potentially catastrophic +results. A manifestation we saw in production was: + +6. In update_entity_lag(), se->slice is used to calculate limit, which + ends up as a huge negative number. +7. limit is used in se->vlag = clamp(vlag, -limit, limit). Because limit + is negative, vlag > limit, so se->vlag is set to the same huge + negative number. +8. In place_entity(), se->vlag is scaled, which overflows and results in + another huge (positive or negative) number. +9. The adjusted lag is subtracted from se->vruntime, which increases or + decreases se->vruntime by a huge number. +10. pick_eevdf() calls entity_eligible()/vruntime_eligible(), which + incorrectly returns false because the vruntime is so far from the + other vruntimes on the queue, causing the + (vruntime - cfs_rq->min_vruntime) * load calulation to overflow. +11. Nothing appears to be eligible, so pick_eevdf() returns NULL. +12. pick_next_entity() tries to dereference the return value of + pick_eevdf() and crashes. + +Dumping the cfs_rq states from the core dumps with drgn showed tell-tale +huge vruntime ranges and bogus vlag values, and I also traced se->slice +being set to U64_MAX on live systems (which was usually "benign" since +the rest of the runqueue needed to be in a particular state to crash). + +Fix it in dequeue_entities() by always setting slice from the first +non-empty cfs_rq. + +Fixes: aef6987d8954 ("sched/eevdf: Propagate min_slice up the cgroup hierarchy") +Signed-off-by: Omar Sandoval +Signed-off-by: Peter Zijlstra (Intel) +Signed-off-by: Ingo Molnar +Link: https://lkml.kernel.org/r/f0c2d1072be229e1bdddc73c0703919a8b00c652.1745570998.git.osandov@fb.com +Signed-off-by: Greg Kroah-Hartman +--- + kernel/sched/fair.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -7182,9 +7182,6 @@ static int dequeue_entities(struct rq *r + idle_h_nr_running = task_has_idle_policy(p); + if (!task_sleep && !task_delayed) + h_nr_delayed = !!se->sched_delayed; +- } else { +- cfs_rq = group_cfs_rq(se); +- slice = cfs_rq_min_slice(cfs_rq); + } + + for_each_sched_entity(se) { +@@ -7194,6 +7191,7 @@ static int dequeue_entities(struct rq *r + if (p && &p->se == se) + return -1; + ++ slice = cfs_rq_min_slice(cfs_rq); + break; + } + diff --git a/queue-6.12/series b/queue-6.12/series index 31bb1772ae..930acd2773 100644 --- a/queue-6.12/series +++ b/queue-6.12/series @@ -151,3 +151,11 @@ drm-xe-tests-mocs-hold-xe_forcewake_all-for-lncf-reg.patch io_uring-sqpoll-increase-task_work-submission-batch-.patch do_umount-add-missing-barrier-before-refcount-checks.patch revert-um-work-around-sched_yield-not-yielding-in-time-travel-mode.patch +rust-allow-rust-1.87.0-s-clippy-ptr_eq-lint.patch +rust-clean-rust-1.88.0-s-clippy-uninlined_format_args-lint.patch +io_uring-always-arm-linked-timeouts-prior-to-issue.patch +bluetooth-btmtk-remove-resetting-mt7921-before-downloading-the-fw.patch +bluetooth-btmtk-remove-the-resetting-step-before-downloading-the-fw.patch +mm-page_alloc-don-t-steal-single-pages-from-biggest-buddy.patch +mm-page_alloc-speed-up-fallbacks-in-rmqueue_bulk.patch +sched-eevdf-fix-se-slice-being-set-to-u64_max-and-resulting-crash.patch