From: Sasha Levin Date: Wed, 21 Jun 2023 16:39:44 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v4.14.320~80 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=6855e3d67af5eb7adf85ae955ea5ad6f73d39cdb;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/epoll-ep_autoremove_wake_function-should-use-list_de.patch b/queue-5.4/epoll-ep_autoremove_wake_function-should-use-list_de.patch new file mode 100644 index 00000000000..66141c2b1b3 --- /dev/null +++ b/queue-5.4/epoll-ep_autoremove_wake_function-should-use-list_de.patch @@ -0,0 +1,47 @@ +From dfb0850769b96ff66a480cf0d2982de5fd5d9122 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 May 2023 11:32:28 -0700 +Subject: epoll: ep_autoremove_wake_function should use list_del_init_careful + +From: Benjamin Segall + +[ Upstream commit 2192bba03d80f829233bfa34506b428f71e531e7 ] + +autoremove_wake_function uses list_del_init_careful, so should epoll's +more aggressive variant. It only doesn't because it was copied from an +older wait.c rather than the most recent. + +[bsegall@google.com: add comment] + Link: https://lkml.kernel.org/r/xm26bki0ulsr.fsf_-_@google.com +Link: https://lkml.kernel.org/r/xm26pm6hvfer.fsf@google.com +Fixes: a16ceb139610 ("epoll: autoremove wakers even more aggressively") +Signed-off-by: Ben Segall +Cc: Al Viro +Cc: Christian Brauner +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + fs/eventpoll.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/fs/eventpoll.c b/fs/eventpoll.c +index 877f9f61a4e8d..8c0e94183186f 100644 +--- a/fs/eventpoll.c ++++ b/fs/eventpoll.c +@@ -1814,7 +1814,11 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry, + { + int ret = default_wake_function(wq_entry, mode, sync, key); + +- list_del_init(&wq_entry->entry); ++ /* ++ * Pairs with list_empty_careful in ep_poll, and ensures future loop ++ * iterations see the cause of this wakeup. ++ */ ++ list_del_init_careful(&wq_entry->entry); + return ret; + } + +-- +2.39.2 + diff --git a/queue-5.4/list-add-list_del_init_careful-to-go-with-list_empty.patch b/queue-5.4/list-add-list_del_init_careful-to-go-with-list_empty.patch new file mode 100644 index 00000000000..16dd3fa5137 --- /dev/null +++ b/queue-5.4/list-add-list_del_init_careful-to-go-with-list_empty.patch @@ -0,0 +1,93 @@ +From 974003992ac2b0a584fb3170b8115dcfb93db6c7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jul 2020 12:33:41 -0700 +Subject: list: add "list_del_init_careful()" to go with "list_empty_careful()" + +From: Linus Torvalds + +[ Upstream commit c6fe44d96fc1536af5b11cd859686453d1b7bfd1 ] + +That gives us ordering guarantees around the pair. + +Signed-off-by: Linus Torvalds +Stable-dep-of: 2192bba03d80 ("epoll: ep_autoremove_wake_function should use list_del_init_careful") +Signed-off-by: Sasha Levin +--- + include/linux/list.h | 20 +++++++++++++++++++- + kernel/sched/wait.c | 2 +- + mm/filemap.c | 7 +------ + 3 files changed, 21 insertions(+), 8 deletions(-) + +diff --git a/include/linux/list.h b/include/linux/list.h +index ce19c6b632a59..231ff089f7d1c 100644 +--- a/include/linux/list.h ++++ b/include/linux/list.h +@@ -268,6 +268,24 @@ static inline int list_empty(const struct list_head *head) + return READ_ONCE(head->next) == head; + } + ++/** ++ * list_del_init_careful - deletes entry from list and reinitialize it. ++ * @entry: the element to delete from the list. ++ * ++ * This is the same as list_del_init(), except designed to be used ++ * together with list_empty_careful() in a way to guarantee ordering ++ * of other memory operations. ++ * ++ * Any memory operations done before a list_del_init_careful() are ++ * guaranteed to be visible after a list_empty_careful() test. ++ */ ++static inline void list_del_init_careful(struct list_head *entry) ++{ ++ __list_del_entry(entry); ++ entry->prev = entry; ++ smp_store_release(&entry->next, entry); ++} ++ + /** + * list_empty_careful - tests whether a list is empty and not being modified + * @head: the list to test +@@ -283,7 +301,7 @@ static inline int list_empty(const struct list_head *head) + */ + static inline int list_empty_careful(const struct list_head *head) + { +- struct list_head *next = head->next; ++ struct list_head *next = smp_load_acquire(&head->next); + return (next == head) && (next == head->prev); + } + +diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c +index 7d668b31dbc6d..c76fe1d4d91e2 100644 +--- a/kernel/sched/wait.c ++++ b/kernel/sched/wait.c +@@ -384,7 +384,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i + int ret = default_wake_function(wq_entry, mode, sync, key); + + if (ret) +- list_del_init(&wq_entry->entry); ++ list_del_init_careful(&wq_entry->entry); + + return ret; + } +diff --git a/mm/filemap.c b/mm/filemap.c +index 83b324420046b..a106d63e84679 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1085,13 +1085,8 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, + * since after list_del_init(&wait->entry) the wait entry + * might be de-allocated and the process might even have + * exited. +- * +- * We _really_ should have a "list_del_init_careful()" to +- * properly pair with the unlocked "list_empty_careful()" +- * in finish_wait(). + */ +- smp_mb(); +- list_del_init(&wait->entry); ++ list_del_init_careful(&wait->entry); + return ret; + } + +-- +2.39.2 + diff --git a/queue-5.4/mm-rewrite-wait_on_page_bit_common-logic.patch b/queue-5.4/mm-rewrite-wait_on_page_bit_common-logic.patch new file mode 100644 index 00000000000..28c2a77c286 --- /dev/null +++ b/queue-5.4/mm-rewrite-wait_on_page_bit_common-logic.patch @@ -0,0 +1,227 @@ +From 4fff1ec5895dec84ae4385ba8c5d25f3856f376d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Jul 2020 10:16:49 -0700 +Subject: mm: rewrite wait_on_page_bit_common() logic + +From: Linus Torvalds + +[ Upstream commit 2a9127fcf2296674d58024f83981f40b128fffea ] + +It turns out that wait_on_page_bit_common() had several problems, +ranging from just unfair behavioe due to re-queueing at the end of the +wait queue when re-trying, and an outright bug that could result in +missed wakeups (but probably never happened in practice). + +This rewrites the whole logic to avoid both issues, by simply moving the +logic to check (and possibly take) the bit lock into the wakeup path +instead. + +That makes everything much more straightforward, and means that we never +need to re-queue the wait entry: if we get woken up, we'll be notified +through WQ_FLAG_WOKEN, and the wait queue entry will have been removed, +and everything will have been done for us. + +Link: https://lore.kernel.org/lkml/CAHk-=wjJA2Z3kUFb-5s=6+n0qbTs8ELqKFt9B3pH85a8fGD73w@mail.gmail.com/ +Link: https://lore.kernel.org/lkml/alpine.LSU.2.11.2007221359450.1017@eggly.anvils/ +Reported-by: Oleg Nesterov +Reported-by: Hugh Dickins +Cc: Michal Hocko +Reviewed-by: Oleg Nesterov +Signed-off-by: Linus Torvalds +Stable-dep-of: 2192bba03d80 ("epoll: ep_autoremove_wake_function should use list_del_init_careful") +Signed-off-by: Sasha Levin +--- + mm/filemap.c | 132 +++++++++++++++++++++++++++++++++------------------ + 1 file changed, 85 insertions(+), 47 deletions(-) + +diff --git a/mm/filemap.c b/mm/filemap.c +index c094103191a6e..83b324420046b 100644 +--- a/mm/filemap.c ++++ b/mm/filemap.c +@@ -1046,6 +1046,7 @@ struct wait_page_queue { + + static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg) + { ++ int ret; + struct wait_page_key *key = arg; + struct wait_page_queue *wait_page + = container_of(wait, struct wait_page_queue, wait); +@@ -1058,17 +1059,40 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, + return 0; + + /* +- * Stop walking if it's locked. +- * Is this safe if put_and_wait_on_page_locked() is in use? +- * Yes: the waker must hold a reference to this page, and if PG_locked +- * has now already been set by another task, that task must also hold +- * a reference to the *same usage* of this page; so there is no need +- * to walk on to wake even the put_and_wait_on_page_locked() callers. ++ * If it's an exclusive wait, we get the bit for it, and ++ * stop walking if we can't. ++ * ++ * If it's a non-exclusive wait, then the fact that this ++ * wake function was called means that the bit already ++ * was cleared, and we don't care if somebody then ++ * re-took it. + */ +- if (test_bit(key->bit_nr, &key->page->flags)) +- return -1; ++ ret = 0; ++ if (wait->flags & WQ_FLAG_EXCLUSIVE) { ++ if (test_and_set_bit(key->bit_nr, &key->page->flags)) ++ return -1; ++ ret = 1; ++ } ++ wait->flags |= WQ_FLAG_WOKEN; + +- return autoremove_wake_function(wait, mode, sync, key); ++ wake_up_state(wait->private, mode); ++ ++ /* ++ * Ok, we have successfully done what we're waiting for, ++ * and we can unconditionally remove the wait entry. ++ * ++ * Note that this has to be the absolute last thing we do, ++ * since after list_del_init(&wait->entry) the wait entry ++ * might be de-allocated and the process might even have ++ * exited. ++ * ++ * We _really_ should have a "list_del_init_careful()" to ++ * properly pair with the unlocked "list_empty_careful()" ++ * in finish_wait(). ++ */ ++ smp_mb(); ++ list_del_init(&wait->entry); ++ return ret; + } + + static void wake_up_page_bit(struct page *page, int bit_nr) +@@ -1147,16 +1171,31 @@ enum behavior { + */ + }; + ++/* ++ * Attempt to check (or get) the page bit, and mark the ++ * waiter woken if successful. ++ */ ++static inline bool trylock_page_bit_common(struct page *page, int bit_nr, ++ struct wait_queue_entry *wait) ++{ ++ if (wait->flags & WQ_FLAG_EXCLUSIVE) { ++ if (test_and_set_bit(bit_nr, &page->flags)) ++ return false; ++ } else if (test_bit(bit_nr, &page->flags)) ++ return false; ++ ++ wait->flags |= WQ_FLAG_WOKEN; ++ return true; ++} ++ + static inline int wait_on_page_bit_common(wait_queue_head_t *q, + struct page *page, int bit_nr, int state, enum behavior behavior) + { + struct wait_page_queue wait_page; + wait_queue_entry_t *wait = &wait_page.wait; +- bool bit_is_set; + bool thrashing = false; + bool delayacct = false; + unsigned long pflags; +- int ret = 0; + + if (bit_nr == PG_locked && + !PageUptodate(page) && PageWorkingset(page)) { +@@ -1174,48 +1213,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, + wait_page.page = page; + wait_page.bit_nr = bit_nr; + +- for (;;) { +- spin_lock_irq(&q->lock); ++ /* ++ * Do one last check whether we can get the ++ * page bit synchronously. ++ * ++ * Do the SetPageWaiters() marking before that ++ * to let any waker we _just_ missed know they ++ * need to wake us up (otherwise they'll never ++ * even go to the slow case that looks at the ++ * page queue), and add ourselves to the wait ++ * queue if we need to sleep. ++ * ++ * This part needs to be done under the queue ++ * lock to avoid races. ++ */ ++ spin_lock_irq(&q->lock); ++ SetPageWaiters(page); ++ if (!trylock_page_bit_common(page, bit_nr, wait)) ++ __add_wait_queue_entry_tail(q, wait); ++ spin_unlock_irq(&q->lock); + +- if (likely(list_empty(&wait->entry))) { +- __add_wait_queue_entry_tail(q, wait); +- SetPageWaiters(page); +- } ++ /* ++ * From now on, all the logic will be based on ++ * the WQ_FLAG_WOKEN flag, and the and the page ++ * bit testing (and setting) will be - or has ++ * already been - done by the wake function. ++ * ++ * We can drop our reference to the page. ++ */ ++ if (behavior == DROP) ++ put_page(page); + ++ for (;;) { + set_current_state(state); + +- spin_unlock_irq(&q->lock); +- +- bit_is_set = test_bit(bit_nr, &page->flags); +- if (behavior == DROP) +- put_page(page); +- +- if (likely(bit_is_set)) +- io_schedule(); +- +- if (behavior == EXCLUSIVE) { +- if (!test_and_set_bit_lock(bit_nr, &page->flags)) +- break; +- } else if (behavior == SHARED) { +- if (!test_bit(bit_nr, &page->flags)) +- break; +- } +- +- if (signal_pending_state(state, current)) { +- ret = -EINTR; ++ if (signal_pending_state(state, current)) + break; +- } + +- if (behavior == DROP) { +- /* +- * We can no longer safely access page->flags: +- * even if CONFIG_MEMORY_HOTREMOVE is not enabled, +- * there is a risk of waiting forever on a page reused +- * for something that keeps it locked indefinitely. +- * But best check for -EINTR above before breaking. +- */ ++ if (wait->flags & WQ_FLAG_WOKEN) + break; +- } ++ ++ io_schedule(); + } + + finish_wait(q, wait); +@@ -1234,7 +1272,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q, + * bother with signals either. + */ + +- return ret; ++ return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR; + } + + void wait_on_page_bit(struct page *page, int bit_nr) +-- +2.39.2 + diff --git a/queue-5.4/series b/queue-5.4/series new file mode 100644 index 00000000000..a9f3c328359 --- /dev/null +++ b/queue-5.4/series @@ -0,0 +1,5 @@ +x86-purgatory-fail-the-build-if-purgatory.ro-has-mis.patch +x86-purgatory-remove-pgo-flags.patch +mm-rewrite-wait_on_page_bit_common-logic.patch +list-add-list_del_init_careful-to-go-with-list_empty.patch +epoll-ep_autoremove_wake_function-should-use-list_de.patch diff --git a/queue-5.4/x86-purgatory-fail-the-build-if-purgatory.ro-has-mis.patch b/queue-5.4/x86-purgatory-fail-the-build-if-purgatory.ro-has-mis.patch new file mode 100644 index 00000000000..1dcc1d63794 --- /dev/null +++ b/queue-5.4/x86-purgatory-fail-the-build-if-purgatory.ro-has-mis.patch @@ -0,0 +1,88 @@ +From 73080c3a50981d969859bdbc1c85d079e59458b0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Mar 2020 14:08:41 +0100 +Subject: x86/purgatory: Fail the build if purgatory.ro has missing symbols + +From: Hans de Goede + +[ Upstream commit e4160b2e4b02377c67f8ecd05786811598f39acd ] + +Linking purgatory.ro with -r enables "incremental linking"; this means +no checks for unresolved symbols are done while linking purgatory.ro. + +A change to the sha256 code has caused the purgatory in 5.4-rc1 to have +a missing symbol on memzero_explicit(), yet things still happily build. + +Add an extra check for unresolved symbols by calling ld without -r +before running bin2c to generate kexec-purgatory.c. + +This causes a build of 5.4-rc1 with this patch added to fail as it should: + + CHK arch/x86/purgatory/purgatory.ro + ld: arch/x86/purgatory/purgatory.ro: in function `sha256_transform': + sha256.c:(.text+0x1c0c): undefined reference to `memzero_explicit' + make[2]: *** [arch/x86/purgatory/Makefile:72: + arch/x86/purgatory/kexec-purgatory.c] Error 1 + make[1]: *** [scripts/Makefile.build:509: arch/x86/purgatory] Error 2 + make: *** [Makefile:1650: arch/x86] Error 2 + +Also remove --no-undefined from LDFLAGS_purgatory.ro as that has no +effect. + +Signed-off-by: Hans de Goede +Signed-off-by: Borislav Petkov +Link: https://lkml.kernel.org/r/20200317130841.290418-2-hdegoede@redhat.com +Stable-dep-of: 97b6b9cbba40 ("x86/purgatory: remove PGO flags") +Signed-off-by: Sasha Levin +--- + arch/x86/purgatory/.gitignore | 1 + + arch/x86/purgatory/Makefile | 13 ++++++++++--- + 2 files changed, 11 insertions(+), 3 deletions(-) + create mode 100644 arch/x86/purgatory/.gitignore + +diff --git a/arch/x86/purgatory/.gitignore b/arch/x86/purgatory/.gitignore +new file mode 100644 +index 0000000000000..d2be1500671de +--- /dev/null ++++ b/arch/x86/purgatory/.gitignore +@@ -0,0 +1 @@ ++purgatory.chk +diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile +index 969d2b2eb7d71..d0f628601d47a 100644 +--- a/arch/x86/purgatory/Makefile ++++ b/arch/x86/purgatory/Makefile +@@ -14,8 +14,12 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE + + CFLAGS_sha256.o := -D__DISABLE_EXPORTS + +-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib +-targets += purgatory.ro ++# When linking purgatory.ro with -r unresolved symbols are not checked, ++# also link a purgatory.chk binary without -r to check for unresolved symbols. ++PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib ++LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS) ++LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS) ++targets += purgatory.ro purgatory.chk + + # Sanitizer, etc. runtimes are unavailable and cannot be linked here. + GCOV_PROFILE := n +@@ -64,12 +68,15 @@ AFLAGS_REMOVE_entry64.o += -g -Wa,-gdwarf-2 + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) + ++$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE ++ $(call if_changed,ld) ++ + targets += kexec-purgatory.c + + quiet_cmd_bin2c = BIN2C $@ + cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@ + +-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE ++$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE + $(call if_changed,bin2c) + + obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o +-- +2.39.2 + diff --git a/queue-5.4/x86-purgatory-remove-pgo-flags.patch b/queue-5.4/x86-purgatory-remove-pgo-flags.patch new file mode 100644 index 00000000000..adea58d5ab8 --- /dev/null +++ b/queue-5.4/x86-purgatory-remove-pgo-flags.patch @@ -0,0 +1,64 @@ +From 5a54fd54f9738e184364b5467ebd072bc48f4ab0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 19 May 2023 16:47:37 +0200 +Subject: x86/purgatory: remove PGO flags + +From: Ricardo Ribalda + +[ Upstream commit 97b6b9cbba40a21c1d9a344d5c1991f8cfbf136e ] + +If profile-guided optimization is enabled, the purgatory ends up with +multiple .text sections. This is not supported by kexec and crashes the +system. + +Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-2-b05c520b7296@chromium.org +Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory") +Signed-off-by: Ricardo Ribalda +Cc: +Cc: Albert Ou +Cc: Baoquan He +Cc: Borislav Petkov (AMD) +Cc: Christophe Leroy +Cc: Dave Hansen +Cc: Dave Young +Cc: Eric W. Biederman +Cc: "H. Peter Anvin" +Cc: Ingo Molnar +Cc: Michael Ellerman +Cc: Nathan Chancellor +Cc: Nicholas Piggin +Cc: Nick Desaulniers +Cc: Palmer Dabbelt +Cc: Palmer Dabbelt +Cc: Paul Walmsley +Cc: Philipp Rudo +Cc: Ross Zwisler +Cc: Simon Horman +Cc: Steven Rostedt (Google) +Cc: Thomas Gleixner +Cc: Tom Rix +Signed-off-by: Andrew Morton +Signed-off-by: Sasha Levin +--- + arch/x86/purgatory/Makefile | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile +index d0f628601d47a..4db1940f81081 100644 +--- a/arch/x86/purgatory/Makefile ++++ b/arch/x86/purgatory/Makefile +@@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE + + CFLAGS_sha256.o := -D__DISABLE_EXPORTS + ++# When profile-guided optimization is enabled, llvm emits two different ++# overlapping text sections, which is not supported by kexec. Remove profile ++# optimization flags. ++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS)) ++ + # When linking purgatory.ro with -r unresolved symbols are not checked, + # also link a purgatory.chk binary without -r to check for unresolved symbols. + PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib +-- +2.39.2 +