--- /dev/null
+From dfb0850769b96ff66a480cf0d2982de5fd5d9122 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 11:32:28 -0700
+Subject: epoll: ep_autoremove_wake_function should use list_del_init_careful
+
+From: Benjamin Segall <bsegall@google.com>
+
+[ Upstream commit 2192bba03d80f829233bfa34506b428f71e531e7 ]
+
+autoremove_wake_function uses list_del_init_careful, so should epoll's
+more aggressive variant. It only doesn't because it was copied from an
+older wait.c rather than the most recent.
+
+[bsegall@google.com: add comment]
+ Link: https://lkml.kernel.org/r/xm26bki0ulsr.fsf_-_@google.com
+Link: https://lkml.kernel.org/r/xm26pm6hvfer.fsf@google.com
+Fixes: a16ceb139610 ("epoll: autoremove wakers even more aggressively")
+Signed-off-by: Ben Segall <bsegall@google.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index 877f9f61a4e8d..8c0e94183186f 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1814,7 +1814,11 @@ static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
+ {
+ int ret = default_wake_function(wq_entry, mode, sync, key);
+
+- list_del_init(&wq_entry->entry);
++ /*
++ * Pairs with list_empty_careful in ep_poll, and ensures future loop
++ * iterations see the cause of this wakeup.
++ */
++ list_del_init_careful(&wq_entry->entry);
+ return ret;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 974003992ac2b0a584fb3170b8115dcfb93db6c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jul 2020 12:33:41 -0700
+Subject: list: add "list_del_init_careful()" to go with "list_empty_careful()"
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit c6fe44d96fc1536af5b11cd859686453d1b7bfd1 ]
+
+That gives us ordering guarantees around the pair.
+
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 2192bba03d80 ("epoll: ep_autoremove_wake_function should use list_del_init_careful")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/list.h | 20 +++++++++++++++++++-
+ kernel/sched/wait.c | 2 +-
+ mm/filemap.c | 7 +------
+ 3 files changed, 21 insertions(+), 8 deletions(-)
+
+diff --git a/include/linux/list.h b/include/linux/list.h
+index ce19c6b632a59..231ff089f7d1c 100644
+--- a/include/linux/list.h
++++ b/include/linux/list.h
+@@ -268,6 +268,24 @@ static inline int list_empty(const struct list_head *head)
+ return READ_ONCE(head->next) == head;
+ }
+
++/**
++ * list_del_init_careful - deletes entry from list and reinitialize it.
++ * @entry: the element to delete from the list.
++ *
++ * This is the same as list_del_init(), except designed to be used
++ * together with list_empty_careful() in a way to guarantee ordering
++ * of other memory operations.
++ *
++ * Any memory operations done before a list_del_init_careful() are
++ * guaranteed to be visible after a list_empty_careful() test.
++ */
++static inline void list_del_init_careful(struct list_head *entry)
++{
++ __list_del_entry(entry);
++ entry->prev = entry;
++ smp_store_release(&entry->next, entry);
++}
++
+ /**
+ * list_empty_careful - tests whether a list is empty and not being modified
+ * @head: the list to test
+@@ -283,7 +301,7 @@ static inline int list_empty(const struct list_head *head)
+ */
+ static inline int list_empty_careful(const struct list_head *head)
+ {
+- struct list_head *next = head->next;
++ struct list_head *next = smp_load_acquire(&head->next);
+ return (next == head) && (next == head->prev);
+ }
+
+diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
+index 7d668b31dbc6d..c76fe1d4d91e2 100644
+--- a/kernel/sched/wait.c
++++ b/kernel/sched/wait.c
+@@ -384,7 +384,7 @@ int autoremove_wake_function(struct wait_queue_entry *wq_entry, unsigned mode, i
+ int ret = default_wake_function(wq_entry, mode, sync, key);
+
+ if (ret)
+- list_del_init(&wq_entry->entry);
++ list_del_init_careful(&wq_entry->entry);
+
+ return ret;
+ }
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 83b324420046b..a106d63e84679 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1085,13 +1085,8 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
+ * since after list_del_init(&wait->entry) the wait entry
+ * might be de-allocated and the process might even have
+ * exited.
+- *
+- * We _really_ should have a "list_del_init_careful()" to
+- * properly pair with the unlocked "list_empty_careful()"
+- * in finish_wait().
+ */
+- smp_mb();
+- list_del_init(&wait->entry);
++ list_del_init_careful(&wait->entry);
+ return ret;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 4fff1ec5895dec84ae4385ba8c5d25f3856f376d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jul 2020 10:16:49 -0700
+Subject: mm: rewrite wait_on_page_bit_common() logic
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+[ Upstream commit 2a9127fcf2296674d58024f83981f40b128fffea ]
+
+It turns out that wait_on_page_bit_common() had several problems,
+ranging from just unfair behavioe due to re-queueing at the end of the
+wait queue when re-trying, and an outright bug that could result in
+missed wakeups (but probably never happened in practice).
+
+This rewrites the whole logic to avoid both issues, by simply moving the
+logic to check (and possibly take) the bit lock into the wakeup path
+instead.
+
+That makes everything much more straightforward, and means that we never
+need to re-queue the wait entry: if we get woken up, we'll be notified
+through WQ_FLAG_WOKEN, and the wait queue entry will have been removed,
+and everything will have been done for us.
+
+Link: https://lore.kernel.org/lkml/CAHk-=wjJA2Z3kUFb-5s=6+n0qbTs8ELqKFt9B3pH85a8fGD73w@mail.gmail.com/
+Link: https://lore.kernel.org/lkml/alpine.LSU.2.11.2007221359450.1017@eggly.anvils/
+Reported-by: Oleg Nesterov <oleg@redhat.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Cc: Michal Hocko <mhocko@suse.com>
+Reviewed-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Stable-dep-of: 2192bba03d80 ("epoll: ep_autoremove_wake_function should use list_del_init_careful")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/filemap.c | 132 +++++++++++++++++++++++++++++++++------------------
+ 1 file changed, 85 insertions(+), 47 deletions(-)
+
+diff --git a/mm/filemap.c b/mm/filemap.c
+index c094103191a6e..83b324420046b 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1046,6 +1046,7 @@ struct wait_page_queue {
+
+ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync, void *arg)
+ {
++ int ret;
+ struct wait_page_key *key = arg;
+ struct wait_page_queue *wait_page
+ = container_of(wait, struct wait_page_queue, wait);
+@@ -1058,17 +1059,40 @@ static int wake_page_function(wait_queue_entry_t *wait, unsigned mode, int sync,
+ return 0;
+
+ /*
+- * Stop walking if it's locked.
+- * Is this safe if put_and_wait_on_page_locked() is in use?
+- * Yes: the waker must hold a reference to this page, and if PG_locked
+- * has now already been set by another task, that task must also hold
+- * a reference to the *same usage* of this page; so there is no need
+- * to walk on to wake even the put_and_wait_on_page_locked() callers.
++ * If it's an exclusive wait, we get the bit for it, and
++ * stop walking if we can't.
++ *
++ * If it's a non-exclusive wait, then the fact that this
++ * wake function was called means that the bit already
++ * was cleared, and we don't care if somebody then
++ * re-took it.
+ */
+- if (test_bit(key->bit_nr, &key->page->flags))
+- return -1;
++ ret = 0;
++ if (wait->flags & WQ_FLAG_EXCLUSIVE) {
++ if (test_and_set_bit(key->bit_nr, &key->page->flags))
++ return -1;
++ ret = 1;
++ }
++ wait->flags |= WQ_FLAG_WOKEN;
+
+- return autoremove_wake_function(wait, mode, sync, key);
++ wake_up_state(wait->private, mode);
++
++ /*
++ * Ok, we have successfully done what we're waiting for,
++ * and we can unconditionally remove the wait entry.
++ *
++ * Note that this has to be the absolute last thing we do,
++ * since after list_del_init(&wait->entry) the wait entry
++ * might be de-allocated and the process might even have
++ * exited.
++ *
++ * We _really_ should have a "list_del_init_careful()" to
++ * properly pair with the unlocked "list_empty_careful()"
++ * in finish_wait().
++ */
++ smp_mb();
++ list_del_init(&wait->entry);
++ return ret;
+ }
+
+ static void wake_up_page_bit(struct page *page, int bit_nr)
+@@ -1147,16 +1171,31 @@ enum behavior {
+ */
+ };
+
++/*
++ * Attempt to check (or get) the page bit, and mark the
++ * waiter woken if successful.
++ */
++static inline bool trylock_page_bit_common(struct page *page, int bit_nr,
++ struct wait_queue_entry *wait)
++{
++ if (wait->flags & WQ_FLAG_EXCLUSIVE) {
++ if (test_and_set_bit(bit_nr, &page->flags))
++ return false;
++ } else if (test_bit(bit_nr, &page->flags))
++ return false;
++
++ wait->flags |= WQ_FLAG_WOKEN;
++ return true;
++}
++
+ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+ struct page *page, int bit_nr, int state, enum behavior behavior)
+ {
+ struct wait_page_queue wait_page;
+ wait_queue_entry_t *wait = &wait_page.wait;
+- bool bit_is_set;
+ bool thrashing = false;
+ bool delayacct = false;
+ unsigned long pflags;
+- int ret = 0;
+
+ if (bit_nr == PG_locked &&
+ !PageUptodate(page) && PageWorkingset(page)) {
+@@ -1174,48 +1213,47 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+ wait_page.page = page;
+ wait_page.bit_nr = bit_nr;
+
+- for (;;) {
+- spin_lock_irq(&q->lock);
++ /*
++ * Do one last check whether we can get the
++ * page bit synchronously.
++ *
++ * Do the SetPageWaiters() marking before that
++ * to let any waker we _just_ missed know they
++ * need to wake us up (otherwise they'll never
++ * even go to the slow case that looks at the
++ * page queue), and add ourselves to the wait
++ * queue if we need to sleep.
++ *
++ * This part needs to be done under the queue
++ * lock to avoid races.
++ */
++ spin_lock_irq(&q->lock);
++ SetPageWaiters(page);
++ if (!trylock_page_bit_common(page, bit_nr, wait))
++ __add_wait_queue_entry_tail(q, wait);
++ spin_unlock_irq(&q->lock);
+
+- if (likely(list_empty(&wait->entry))) {
+- __add_wait_queue_entry_tail(q, wait);
+- SetPageWaiters(page);
+- }
++ /*
++ * From now on, all the logic will be based on
++ * the WQ_FLAG_WOKEN flag, and the and the page
++ * bit testing (and setting) will be - or has
++ * already been - done by the wake function.
++ *
++ * We can drop our reference to the page.
++ */
++ if (behavior == DROP)
++ put_page(page);
+
++ for (;;) {
+ set_current_state(state);
+
+- spin_unlock_irq(&q->lock);
+-
+- bit_is_set = test_bit(bit_nr, &page->flags);
+- if (behavior == DROP)
+- put_page(page);
+-
+- if (likely(bit_is_set))
+- io_schedule();
+-
+- if (behavior == EXCLUSIVE) {
+- if (!test_and_set_bit_lock(bit_nr, &page->flags))
+- break;
+- } else if (behavior == SHARED) {
+- if (!test_bit(bit_nr, &page->flags))
+- break;
+- }
+-
+- if (signal_pending_state(state, current)) {
+- ret = -EINTR;
++ if (signal_pending_state(state, current))
+ break;
+- }
+
+- if (behavior == DROP) {
+- /*
+- * We can no longer safely access page->flags:
+- * even if CONFIG_MEMORY_HOTREMOVE is not enabled,
+- * there is a risk of waiting forever on a page reused
+- * for something that keeps it locked indefinitely.
+- * But best check for -EINTR above before breaking.
+- */
++ if (wait->flags & WQ_FLAG_WOKEN)
+ break;
+- }
++
++ io_schedule();
+ }
+
+ finish_wait(q, wait);
+@@ -1234,7 +1272,7 @@ static inline int wait_on_page_bit_common(wait_queue_head_t *q,
+ * bother with signals either.
+ */
+
+- return ret;
++ return wait->flags & WQ_FLAG_WOKEN ? 0 : -EINTR;
+ }
+
+ void wait_on_page_bit(struct page *page, int bit_nr)
+--
+2.39.2
+
--- /dev/null
+x86-purgatory-fail-the-build-if-purgatory.ro-has-mis.patch
+x86-purgatory-remove-pgo-flags.patch
+mm-rewrite-wait_on_page_bit_common-logic.patch
+list-add-list_del_init_careful-to-go-with-list_empty.patch
+epoll-ep_autoremove_wake_function-should-use-list_de.patch
--- /dev/null
+From 73080c3a50981d969859bdbc1c85d079e59458b0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Mar 2020 14:08:41 +0100
+Subject: x86/purgatory: Fail the build if purgatory.ro has missing symbols
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit e4160b2e4b02377c67f8ecd05786811598f39acd ]
+
+Linking purgatory.ro with -r enables "incremental linking"; this means
+no checks for unresolved symbols are done while linking purgatory.ro.
+
+A change to the sha256 code has caused the purgatory in 5.4-rc1 to have
+a missing symbol on memzero_explicit(), yet things still happily build.
+
+Add an extra check for unresolved symbols by calling ld without -r
+before running bin2c to generate kexec-purgatory.c.
+
+This causes a build of 5.4-rc1 with this patch added to fail as it should:
+
+ CHK arch/x86/purgatory/purgatory.ro
+ ld: arch/x86/purgatory/purgatory.ro: in function `sha256_transform':
+ sha256.c:(.text+0x1c0c): undefined reference to `memzero_explicit'
+ make[2]: *** [arch/x86/purgatory/Makefile:72:
+ arch/x86/purgatory/kexec-purgatory.c] Error 1
+ make[1]: *** [scripts/Makefile.build:509: arch/x86/purgatory] Error 2
+ make: *** [Makefile:1650: arch/x86] Error 2
+
+Also remove --no-undefined from LDFLAGS_purgatory.ro as that has no
+effect.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Borislav Petkov <bp@suse.de>
+Link: https://lkml.kernel.org/r/20200317130841.290418-2-hdegoede@redhat.com
+Stable-dep-of: 97b6b9cbba40 ("x86/purgatory: remove PGO flags")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/purgatory/.gitignore | 1 +
+ arch/x86/purgatory/Makefile | 13 ++++++++++---
+ 2 files changed, 11 insertions(+), 3 deletions(-)
+ create mode 100644 arch/x86/purgatory/.gitignore
+
+diff --git a/arch/x86/purgatory/.gitignore b/arch/x86/purgatory/.gitignore
+new file mode 100644
+index 0000000000000..d2be1500671de
+--- /dev/null
++++ b/arch/x86/purgatory/.gitignore
+@@ -0,0 +1 @@
++purgatory.chk
+diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
+index 969d2b2eb7d71..d0f628601d47a 100644
+--- a/arch/x86/purgatory/Makefile
++++ b/arch/x86/purgatory/Makefile
+@@ -14,8 +14,12 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+
+ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
+-LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
+-targets += purgatory.ro
++# When linking purgatory.ro with -r unresolved symbols are not checked,
++# also link a purgatory.chk binary without -r to check for unresolved symbols.
++PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib
++LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS)
++LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS)
++targets += purgatory.ro purgatory.chk
+
+ # Sanitizer, etc. runtimes are unavailable and cannot be linked here.
+ GCOV_PROFILE := n
+@@ -64,12 +68,15 @@ AFLAGS_REMOVE_entry64.o += -g -Wa,-gdwarf-2
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+
++$(obj)/purgatory.chk: $(obj)/purgatory.ro FORCE
++ $(call if_changed,ld)
++
+ targets += kexec-purgatory.c
+
+ quiet_cmd_bin2c = BIN2C $@
+ cmd_bin2c = $(objtree)/scripts/bin2c kexec_purgatory < $< > $@
+
+-$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro FORCE
++$(obj)/kexec-purgatory.c: $(obj)/purgatory.ro $(obj)/purgatory.chk FORCE
+ $(call if_changed,bin2c)
+
+ obj-$(CONFIG_KEXEC_FILE) += kexec-purgatory.o
+--
+2.39.2
+
--- /dev/null
+From 5a54fd54f9738e184364b5467ebd072bc48f4ab0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 16:47:37 +0200
+Subject: x86/purgatory: remove PGO flags
+
+From: Ricardo Ribalda <ribalda@chromium.org>
+
+[ Upstream commit 97b6b9cbba40a21c1d9a344d5c1991f8cfbf136e ]
+
+If profile-guided optimization is enabled, the purgatory ends up with
+multiple .text sections. This is not supported by kexec and crashes the
+system.
+
+Link: https://lkml.kernel.org/r/20230321-kexec_clang16-v7-2-b05c520b7296@chromium.org
+Fixes: 930457057abe ("kernel/kexec_file.c: split up __kexec_load_puragory")
+Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
+Cc: <stable@vger.kernel.org>
+Cc: Albert Ou <aou@eecs.berkeley.edu>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Dave Young <dyoung@redhat.com>
+Cc: Eric W. Biederman <ebiederm@xmission.com>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Nathan Chancellor <nathan@kernel.org>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Nick Desaulniers <ndesaulniers@google.com>
+Cc: Palmer Dabbelt <palmer@dabbelt.com>
+Cc: Palmer Dabbelt <palmer@rivosinc.com>
+Cc: Paul Walmsley <paul.walmsley@sifive.com>
+Cc: Philipp Rudo <prudo@redhat.com>
+Cc: Ross Zwisler <zwisler@google.com>
+Cc: Simon Horman <horms@kernel.org>
+Cc: Steven Rostedt (Google) <rostedt@goodmis.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Tom Rix <trix@redhat.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/purgatory/Makefile | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
+index d0f628601d47a..4db1940f81081 100644
+--- a/arch/x86/purgatory/Makefile
++++ b/arch/x86/purgatory/Makefile
+@@ -14,6 +14,11 @@ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+
+ CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
++# When profile-guided optimization is enabled, llvm emits two different
++# overlapping text sections, which is not supported by kexec. Remove profile
++# optimization flags.
++KBUILD_CFLAGS := $(filter-out -fprofile-sample-use=% -fprofile-use=%,$(KBUILD_CFLAGS))
++
+ # When linking purgatory.ro with -r unresolved symbols are not checked,
+ # also link a purgatory.chk binary without -r to check for unresolved symbols.
+ PURGATORY_LDFLAGS := -e purgatory_start -nostdlib -z nodefaultlib
+--
+2.39.2
+