--- /dev/null
+From 3e74859ee35edc33a022c3f3971df066ea0ca6b9 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 13 Dec 2024 12:22:32 -0800
+Subject: btrfs: check folio mapping after unlock in relocate_one_folio()
+
+From: Boris Burkov <boris@bur.io>
+
+commit 3e74859ee35edc33a022c3f3971df066ea0ca6b9 upstream.
+
+When we call btrfs_read_folio() to bring a folio uptodate, we unlock the
+folio. The result of that is that a different thread can modify the
+mapping (like remove it with invalidate) before we call folio_lock().
+This results in an invalid page and we need to try again.
+
+In particular, if we are relocating concurrently with aborting a
+transaction, this can result in a crash like the following:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000000
+ PGD 0 P4D 0
+ Oops: 0000 [#1] SMP
+ CPU: 76 PID: 1411631 Comm: kworker/u322:5
+ Workqueue: events_unbound btrfs_reclaim_bgs_work
+ RIP: 0010:set_page_extent_mapped+0x20/0xb0
+ RSP: 0018:ffffc900516a7be8 EFLAGS: 00010246
+ RAX: ffffea009e851d08 RBX: ffffea009e0b1880 RCX: 0000000000000000
+ RDX: 0000000000000000 RSI: ffffc900516a7b90 RDI: ffffea009e0b1880
+ RBP: 0000000003573000 R08: 0000000000000001 R09: ffff88c07fd2f3f0
+ R10: 0000000000000000 R11: 0000194754b575be R12: 0000000003572000
+ R13: 0000000003572fff R14: 0000000000100cca R15: 0000000005582fff
+ FS: 0000000000000000(0000) GS:ffff88c07fd00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000000000000000 CR3: 000000407d00f002 CR4: 00000000007706f0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ PKRU: 55555554
+ Call Trace:
+ <TASK>
+ ? __die+0x78/0xc0
+ ? page_fault_oops+0x2a8/0x3a0
+ ? __switch_to+0x133/0x530
+ ? wq_worker_running+0xa/0x40
+ ? exc_page_fault+0x63/0x130
+ ? asm_exc_page_fault+0x22/0x30
+ ? set_page_extent_mapped+0x20/0xb0
+ relocate_file_extent_cluster+0x1a7/0x940
+ relocate_data_extent+0xaf/0x120
+ relocate_block_group+0x20f/0x480
+ btrfs_relocate_block_group+0x152/0x320
+ btrfs_relocate_chunk+0x3d/0x120
+ btrfs_reclaim_bgs_work+0x2ae/0x4e0
+ process_scheduled_works+0x184/0x370
+ worker_thread+0xc6/0x3e0
+ ? blk_add_timer+0xb0/0xb0
+ kthread+0xae/0xe0
+ ? flush_tlb_kernel_range+0x90/0x90
+ ret_from_fork+0x2f/0x40
+ ? flush_tlb_kernel_range+0x90/0x90
+ ret_from_fork_asm+0x11/0x20
+ </TASK>
+
+This occurs because cleanup_one_transaction() calls
+destroy_delalloc_inodes() which calls invalidate_inode_pages2() which
+takes the folio_lock before setting mapping to NULL. We fail to check
+this, and subsequently call set_extent_mapping(), which assumes that
+mapping != NULL (in fact it asserts that in debug mode)
+
+Note that the "fixes" patch here is not the one that introduced the
+race (the very first iteration of this code from 2009) but a more recent
+change that made this particular crash happen in practice.
+
+Fixes: e7f1326cc24e ("btrfs: set page extent mapped after read_folio in relocate_one_page")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Zhaoyang Li <lizy04@hust.edu.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2977,6 +2977,7 @@ static int relocate_one_page(struct inod
+ int ret;
+
+ ASSERT(page_index <= last_index);
++again:
+ page = find_lock_page(inode->i_mapping, page_index);
+ if (!page) {
+ page_cache_sync_readahead(inode->i_mapping, ra, NULL,
+@@ -2998,6 +2999,11 @@ static int relocate_one_page(struct inod
+ ret = -EIO;
+ goto release_page;
+ }
++ if (page->mapping != inode->i_mapping) {
++ unlock_page(page);
++ put_page(page);
++ goto again;
++ }
+ }
+
+ /*
--- /dev/null
+From 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Sat, 18 Jan 2025 00:24:33 +0100
+Subject: hrtimers: Force migrate away hrtimers queued after CPUHP_AP_HRTIMERS_DYING
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 upstream.
+
+hrtimers are migrated away from the dying CPU to any online target at
+the CPUHP_AP_HRTIMERS_DYING stage in order not to delay bandwidth timers
+handling tasks involved in the CPU hotplug forward progress.
+
+However wakeups can still be performed by the outgoing CPU after
+CPUHP_AP_HRTIMERS_DYING. Those can result again in bandwidth timers being
+armed. Depending on several considerations (crystal ball power management
+based election, earliest timer already enqueued, timer migration enabled or
+not), the target may eventually be the current CPU even if offline. If that
+happens, the timer is eventually ignored.
+
+The most notable example is RCU which had to deal with each and every of
+those wake-ups by deferring them to an online CPU, along with related
+workarounds:
+
+_ e787644caf76 (rcu: Defer RCU kthreads wakeup when CPU is dying)
+_ 9139f93209d1 (rcu/nocb: Fix RT throttling hrtimer armed from offline CPU)
+_ f7345ccc62a4 (rcu/nocb: Fix rcuog wake-up from offline softirq)
+
+The problem isn't confined to RCU though as the stop machine kthread
+(which runs CPUHP_AP_HRTIMERS_DYING) reports its completion at the end
+of its work through cpu_stop_signal_done() and performs a wake up that
+eventually arms the deadline server timer:
+
+ WARNING: CPU: 94 PID: 588 at kernel/time/hrtimer.c:1086 hrtimer_start_range_ns+0x289/0x2d0
+ CPU: 94 UID: 0 PID: 588 Comm: migration/94 Not tainted
+ Stopper: multi_cpu_stop+0x0/0x120 <- stop_machine_cpuslocked+0x66/0xc0
+ RIP: 0010:hrtimer_start_range_ns+0x289/0x2d0
+ Call Trace:
+ <TASK>
+ start_dl_timer
+ enqueue_dl_entity
+ dl_server_start
+ enqueue_task_fair
+ enqueue_task
+ ttwu_do_activate
+ try_to_wake_up
+ complete
+ cpu_stopper_thread
+
+Instead of providing yet another bandaid to work around the situation, fix
+it in the hrtimers infrastructure instead: always migrate away a timer to
+an online target whenever it is enqueued from an offline CPU.
+
+This will also allow to revert all the above RCU disgraceful hacks.
+
+Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
+Reported-by: Vlad Poenaru <vlad.wing@gmail.com>
+Reported-by: Usama Arif <usamaarif642@gmail.com>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Tested-by: Paul E. McKenney <paulmck@kernel.org>
+Link: https://lore.kernel.org/all/20250117232433.24027-1-frederic@kernel.org
+Closes: 20241213203739.1519801-1-usamaarif642@gmail.com
+Signed-off-by: Zhaoyang Li <lizy04@hust.edu.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hrtimer.h | 1
+ kernel/time/hrtimer.c | 103 ++++++++++++++++++++++++++++++++++++++----------
+ 2 files changed, 83 insertions(+), 21 deletions(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -237,6 +237,7 @@ struct hrtimer_cpu_base {
+ ktime_t softirq_expires_next;
+ struct hrtimer *softirq_next_timer;
+ struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES];
++ call_single_data_t csd;
+ } ____cacheline_aligned;
+
+ static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -58,6 +58,8 @@
+ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
+ #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
+
++static void retrigger_next_event(void *arg);
++
+ /*
+ * The timer bases:
+ *
+@@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base,
+ .clockid = CLOCK_TAI,
+ .get_time = &ktime_get_clocktai,
+ },
+- }
++ },
++ .csd = CSD_INIT(retrigger_next_event, NULL)
+ };
+
+ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+@@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_t
+ [CLOCK_TAI] = HRTIMER_BASE_TAI,
+ };
+
++static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base)
++{
++ if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
++ return true;
++ else
++ return likely(base->online);
++}
++
+ /*
+ * Functions and macros which are different for UP/SMP systems are kept in a
+ * single place
+@@ -177,27 +188,54 @@ struct hrtimer_clock_base *lock_hrtimer_
+ }
+
+ /*
+- * We do not migrate the timer when it is expiring before the next
+- * event on the target cpu. When high resolution is enabled, we cannot
+- * reprogram the target cpu hardware and we would cause it to fire
+- * late. To keep it simple, we handle the high resolution enabled and
+- * disabled case similar.
++ * Check if the elected target is suitable considering its next
++ * event and the hotplug state of the current CPU.
++ *
++ * If the elected target is remote and its next event is after the timer
++ * to queue, then a remote reprogram is necessary. However there is no
++ * guarantee the IPI handling the operation would arrive in time to meet
++ * the high resolution deadline. In this case the local CPU becomes a
++ * preferred target, unless it is offline.
++ *
++ * High and low resolution modes are handled the same way for simplicity.
+ *
+ * Called with cpu_base->lock of target cpu held.
+ */
+-static int
+-hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
++static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base,
++ struct hrtimer_cpu_base *new_cpu_base,
++ struct hrtimer_cpu_base *this_cpu_base)
+ {
+ ktime_t expires;
+
++ /*
++ * The local CPU clockevent can be reprogrammed. Also get_target_base()
++ * guarantees it is online.
++ */
++ if (new_cpu_base == this_cpu_base)
++ return true;
++
++ /*
++ * The offline local CPU can't be the default target if the
++ * next remote target event is after this timer. Keep the
++ * elected new base. An IPI will we issued to reprogram
++ * it as a last resort.
++ */
++ if (!hrtimer_base_is_online(this_cpu_base))
++ return true;
++
+ expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+- return expires < new_base->cpu_base->expires_next;
++
++ return expires >= new_base->cpu_base->expires_next;
+ }
+
+-static inline
+-struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
+- int pinned)
++static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned)
+ {
++ if (!hrtimer_base_is_online(base)) {
++ int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));
++
++ return &per_cpu(hrtimer_bases, cpu);
++ }
++
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+ if (static_branch_likely(&timers_migration_enabled) && !pinned)
+ return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+@@ -248,8 +286,8 @@ again:
+ raw_spin_unlock(&base->cpu_base->lock);
+ raw_spin_lock(&new_base->cpu_base->lock);
+
+- if (new_cpu_base != this_cpu_base &&
+- hrtimer_check_target(timer, new_base)) {
++ if (!hrtimer_suitable_target(timer, new_base, new_cpu_base,
++ this_cpu_base)) {
+ raw_spin_unlock(&new_base->cpu_base->lock);
+ raw_spin_lock(&base->cpu_base->lock);
+ new_cpu_base = this_cpu_base;
+@@ -258,8 +296,7 @@ again:
+ }
+ WRITE_ONCE(timer->base, new_base);
+ } else {
+- if (new_cpu_base != this_cpu_base &&
+- hrtimer_check_target(timer, new_base)) {
++ if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) {
+ new_cpu_base = this_cpu_base;
+ goto again;
+ }
+@@ -718,8 +755,6 @@ static inline int hrtimer_is_hres_enable
+ return hrtimer_hres_enabled;
+ }
+
+-static void retrigger_next_event(void *arg);
+-
+ /*
+ * Switch to high resolution mode
+ */
+@@ -1205,6 +1240,7 @@ static int __hrtimer_start_range_ns(stru
+ u64 delta_ns, const enum hrtimer_mode mode,
+ struct hrtimer_clock_base *base)
+ {
++ struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases);
+ struct hrtimer_clock_base *new_base;
+ bool force_local, first;
+
+@@ -1216,10 +1252,16 @@ static int __hrtimer_start_range_ns(stru
+ * and enforce reprogramming after it is queued no matter whether
+ * it is the new first expiring timer again or not.
+ */
+- force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
++ force_local = base->cpu_base == this_cpu_base;
+ force_local &= base->cpu_base->next_timer == timer;
+
+ /*
++ * Don't force local queuing if this enqueue happens on a unplugged
++ * CPU after hrtimer_cpu_dying() has been invoked.
++ */
++ force_local &= this_cpu_base->online;
++
++ /*
+ * Remove an active timer from the queue. In case it is not queued
+ * on the current CPU, make sure that remove_hrtimer() updates the
+ * remote data correctly.
+@@ -1248,8 +1290,27 @@ static int __hrtimer_start_range_ns(stru
+ }
+
+ first = enqueue_hrtimer(timer, new_base, mode);
+- if (!force_local)
+- return first;
++ if (!force_local) {
++ /*
++ * If the current CPU base is online, then the timer is
++ * never queued on a remote CPU if it would be the first
++ * expiring timer there.
++ */
++ if (hrtimer_base_is_online(this_cpu_base))
++ return first;
++
++ /*
++ * Timer was enqueued remote because the current base is
++ * already offline. If the timer is the first to expire,
++ * kick the remote CPU to reprogram the clock event.
++ */
++ if (first) {
++ struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base;
++
++ smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd);
++ }
++ return 0;
++ }
+
+ /*
+ * Timer was forced to stay on the current CPU to avoid