]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.6-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 May 2025 11:41:49 +0000 (13:41 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 29 May 2025 11:41:49 +0000 (13:41 +0200)
added patches:
btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch
can-kvaser_pciefd-force-irq-edge-in-case-of-nested-irq.patch
hrtimers-force-migrate-away-hrtimers-queued-after-cpuhp_ap_hrtimers_dying.patch

queue-6.6/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch [new file with mode: 0644]
queue-6.6/can-kvaser_pciefd-force-irq-edge-in-case-of-nested-irq.patch [new file with mode: 0644]
queue-6.6/hrtimers-force-migrate-away-hrtimers-queued-after-cpuhp_ap_hrtimers_dying.patch [new file with mode: 0644]
queue-6.6/series

diff --git a/queue-6.6/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch b/queue-6.6/btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch
new file mode 100644 (file)
index 0000000..5b6d2bf
--- /dev/null
@@ -0,0 +1,103 @@
+From 3e74859ee35edc33a022c3f3971df066ea0ca6b9 Mon Sep 17 00:00:00 2001
+From: Boris Burkov <boris@bur.io>
+Date: Fri, 13 Dec 2024 12:22:32 -0800
+Subject: btrfs: check folio mapping after unlock in relocate_one_folio()
+
+From: Boris Burkov <boris@bur.io>
+
+commit 3e74859ee35edc33a022c3f3971df066ea0ca6b9 upstream.
+
+When we call btrfs_read_folio() to bring a folio uptodate, we unlock the
+folio. The result of that is that a different thread can modify the
+mapping (like remove it with invalidate) before we call folio_lock().
+This results in an invalid page and we need to try again.
+
+In particular, if we are relocating concurrently with aborting a
+transaction, this can result in a crash like the following:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000000
+  PGD 0 P4D 0
+  Oops: 0000 [#1] SMP
+  CPU: 76 PID: 1411631 Comm: kworker/u322:5
+  Workqueue: events_unbound btrfs_reclaim_bgs_work
+  RIP: 0010:set_page_extent_mapped+0x20/0xb0
+  RSP: 0018:ffffc900516a7be8 EFLAGS: 00010246
+  RAX: ffffea009e851d08 RBX: ffffea009e0b1880 RCX: 0000000000000000
+  RDX: 0000000000000000 RSI: ffffc900516a7b90 RDI: ffffea009e0b1880
+  RBP: 0000000003573000 R08: 0000000000000001 R09: ffff88c07fd2f3f0
+  R10: 0000000000000000 R11: 0000194754b575be R12: 0000000003572000
+  R13: 0000000003572fff R14: 0000000000100cca R15: 0000000005582fff
+  FS:  0000000000000000(0000) GS:ffff88c07fd00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000000 CR3: 000000407d00f002 CR4: 00000000007706f0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  PKRU: 55555554
+  Call Trace:
+  <TASK>
+  ? __die+0x78/0xc0
+  ? page_fault_oops+0x2a8/0x3a0
+  ? __switch_to+0x133/0x530
+  ? wq_worker_running+0xa/0x40
+  ? exc_page_fault+0x63/0x130
+  ? asm_exc_page_fault+0x22/0x30
+  ? set_page_extent_mapped+0x20/0xb0
+  relocate_file_extent_cluster+0x1a7/0x940
+  relocate_data_extent+0xaf/0x120
+  relocate_block_group+0x20f/0x480
+  btrfs_relocate_block_group+0x152/0x320
+  btrfs_relocate_chunk+0x3d/0x120
+  btrfs_reclaim_bgs_work+0x2ae/0x4e0
+  process_scheduled_works+0x184/0x370
+  worker_thread+0xc6/0x3e0
+  ? blk_add_timer+0xb0/0xb0
+  kthread+0xae/0xe0
+  ? flush_tlb_kernel_range+0x90/0x90
+  ret_from_fork+0x2f/0x40
+  ? flush_tlb_kernel_range+0x90/0x90
+  ret_from_fork_asm+0x11/0x20
+  </TASK>
+
+This occurs because cleanup_one_transaction() calls
+destroy_delalloc_inodes() which calls invalidate_inode_pages2() which
+takes the folio_lock before setting mapping to NULL. We fail to check
+this, and subsequently call set_extent_mapping(), which assumes that
+mapping != NULL (in fact it asserts that in debug mode)
+
+Note that the "fixes" patch here is not the one that introduced the
+race (the very first iteration of this code from 2009) but a more recent
+change that made this particular crash happen in practice.
+
+Fixes: e7f1326cc24e ("btrfs: set page extent mapped after read_folio in relocate_one_page")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Boris Burkov <boris@bur.io>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Zhaoyang Li <lizy04@hust.edu.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/relocation.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -2931,6 +2931,7 @@ static int relocate_one_page(struct inod
+       int ret;
+       ASSERT(page_index <= last_index);
++again:
+       page = find_lock_page(inode->i_mapping, page_index);
+       if (!page) {
+               page_cache_sync_readahead(inode->i_mapping, ra, NULL,
+@@ -2952,6 +2953,11 @@ static int relocate_one_page(struct inod
+                       ret = -EIO;
+                       goto release_page;
+               }
++              if (page->mapping != inode->i_mapping) {
++                      unlock_page(page);
++                      put_page(page);
++                      goto again;
++              }
+       }
+       /*
diff --git a/queue-6.6/can-kvaser_pciefd-force-irq-edge-in-case-of-nested-irq.patch b/queue-6.6/can-kvaser_pciefd-force-irq-edge-in-case-of-nested-irq.patch
new file mode 100644 (file)
index 0000000..a098bc2
--- /dev/null
@@ -0,0 +1,187 @@
+From 9176bd205ee0b2cd35073a9973c2a0936bcb579e Mon Sep 17 00:00:00 2001
+From: Axel Forsman <axfo@kvaser.com>
+Date: Tue, 20 May 2025 13:43:30 +0200
+Subject: can: kvaser_pciefd: Force IRQ edge in case of nested IRQ
+
+From: Axel Forsman <axfo@kvaser.com>
+
+commit 9176bd205ee0b2cd35073a9973c2a0936bcb579e upstream.
+
+Avoid the driver missing IRQs by temporarily masking IRQs in the ISR
+to enforce an edge even if a different IRQ is signalled before handled
+IRQs are cleared.
+
+Fixes: 48f827d4f48f ("can: kvaser_pciefd: Move reset of DMA RX buffers to the end of the ISR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Axel Forsman <axfo@kvaser.com>
+Tested-by: Jimmy Assarsson <extja@kvaser.com>
+Reviewed-by: Jimmy Assarsson <extja@kvaser.com>
+Link: https://patch.msgid.link/20250520114332.8961-2-axfo@kvaser.com
+Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/can/kvaser_pciefd.c |   80 ++++++++++++++++++----------------------
+ 1 file changed, 37 insertions(+), 43 deletions(-)
+
+--- a/drivers/net/can/kvaser_pciefd.c
++++ b/drivers/net/can/kvaser_pciefd.c
+@@ -1582,24 +1582,28 @@ static int kvaser_pciefd_read_buffer(str
+       return res;
+ }
+-static u32 kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
++static void kvaser_pciefd_receive_irq(struct kvaser_pciefd *pcie)
+ {
++      void __iomem *srb_cmd_reg = KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG;
+       u32 irq = ioread32(KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
+-      if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0)
++      iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
++
++      if (irq & KVASER_PCIEFD_SRB_IRQ_DPD0) {
+               kvaser_pciefd_read_buffer(pcie, 0);
++              iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0, srb_cmd_reg); /* Rearm buffer */
++      }
+-      if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1)
++      if (irq & KVASER_PCIEFD_SRB_IRQ_DPD1) {
+               kvaser_pciefd_read_buffer(pcie, 1);
++              iowrite32(KVASER_PCIEFD_SRB_CMD_RDB1, srb_cmd_reg); /* Rearm buffer */
++      }
+       if (irq & KVASER_PCIEFD_SRB_IRQ_DOF0 ||
+           irq & KVASER_PCIEFD_SRB_IRQ_DOF1 ||
+           irq & KVASER_PCIEFD_SRB_IRQ_DUF0 ||
+           irq & KVASER_PCIEFD_SRB_IRQ_DUF1)
+               dev_err(&pcie->pci->dev, "DMA IRQ error 0x%08X\n", irq);
+-
+-      iowrite32(irq, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IRQ_REG);
+-      return irq;
+ }
+ static void kvaser_pciefd_transmit_irq(struct kvaser_pciefd_can *can)
+@@ -1627,29 +1631,22 @@ static irqreturn_t kvaser_pciefd_irq_han
+       struct kvaser_pciefd *pcie = (struct kvaser_pciefd *)dev;
+       const struct kvaser_pciefd_irq_mask *irq_mask = pcie->driver_data->irq_mask;
+       u32 pci_irq = ioread32(KVASER_PCIEFD_PCI_IRQ_ADDR(pcie));
+-      u32 srb_irq = 0;
+-      u32 srb_release = 0;
+       int i;
+       if (!(pci_irq & irq_mask->all))
+               return IRQ_NONE;
++      iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
++
+       if (pci_irq & irq_mask->kcan_rx0)
+-              srb_irq = kvaser_pciefd_receive_irq(pcie);
++              kvaser_pciefd_receive_irq(pcie);
+       for (i = 0; i < pcie->nr_channels; i++) {
+               if (pci_irq & irq_mask->kcan_tx[i])
+                       kvaser_pciefd_transmit_irq(pcie->can[i]);
+       }
+-      if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD0)
+-              srb_release |= KVASER_PCIEFD_SRB_CMD_RDB0;
+-
+-      if (srb_irq & KVASER_PCIEFD_SRB_IRQ_DPD1)
+-              srb_release |= KVASER_PCIEFD_SRB_CMD_RDB1;
+-
+-      if (srb_release)
+-              iowrite32(srb_release, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
++      iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
+       return IRQ_HANDLED;
+ }
+@@ -1669,13 +1666,22 @@ static void kvaser_pciefd_teardown_can_c
+       }
+ }
++static void kvaser_pciefd_disable_irq_srcs(struct kvaser_pciefd *pcie)
++{
++      unsigned int i;
++
++      /* Masking PCI_IRQ is insufficient as running ISR will unmask it */
++      iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG);
++      for (i = 0; i < pcie->nr_channels; ++i)
++              iowrite32(0, pcie->can[i]->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
++}
++
+ static int kvaser_pciefd_probe(struct pci_dev *pdev,
+                              const struct pci_device_id *id)
+ {
+       int err;
+       struct kvaser_pciefd *pcie;
+       const struct kvaser_pciefd_irq_mask *irq_mask;
+-      void __iomem *irq_en_base;
+       pcie = devm_kzalloc(&pdev->dev, sizeof(*pcie), GFP_KERNEL);
+       if (!pcie)
+@@ -1728,8 +1734,7 @@ static int kvaser_pciefd_probe(struct pc
+                 KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_IEN_REG);
+       /* Enable PCI interrupts */
+-      irq_en_base = KVASER_PCIEFD_PCI_IEN_ADDR(pcie);
+-      iowrite32(irq_mask->all, irq_en_base);
++      iowrite32(irq_mask->all, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
+       /* Ready the DMA buffers */
+       iowrite32(KVASER_PCIEFD_SRB_CMD_RDB0,
+                 KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CMD_REG);
+@@ -1743,8 +1748,7 @@ static int kvaser_pciefd_probe(struct pc
+       return 0;
+ err_free_irq:
+-      /* Disable PCI interrupts */
+-      iowrite32(0, irq_en_base);
++      kvaser_pciefd_disable_irq_srcs(pcie);
+       free_irq(pcie->pci->irq, pcie);
+ err_teardown_can_ctrls:
+@@ -1764,35 +1768,25 @@ err_disable_pci:
+       return err;
+ }
+-static void kvaser_pciefd_remove_all_ctrls(struct kvaser_pciefd *pcie)
+-{
+-      int i;
+-
+-      for (i = 0; i < pcie->nr_channels; i++) {
+-              struct kvaser_pciefd_can *can = pcie->can[i];
+-
+-              if (can) {
+-                      iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_IEN_REG);
+-                      unregister_candev(can->can.dev);
+-                      del_timer(&can->bec_poll_timer);
+-                      kvaser_pciefd_pwm_stop(can);
+-                      free_candev(can->can.dev);
+-              }
+-      }
+-}
+-
+ static void kvaser_pciefd_remove(struct pci_dev *pdev)
+ {
+       struct kvaser_pciefd *pcie = pci_get_drvdata(pdev);
++      unsigned int i;
+-      kvaser_pciefd_remove_all_ctrls(pcie);
++      for (i = 0; i < pcie->nr_channels; ++i) {
++              struct kvaser_pciefd_can *can = pcie->can[i];
+-      /* Disable interrupts */
+-      iowrite32(0, KVASER_PCIEFD_SRB_ADDR(pcie) + KVASER_PCIEFD_SRB_CTRL_REG);
+-      iowrite32(0, KVASER_PCIEFD_PCI_IEN_ADDR(pcie));
++              unregister_candev(can->can.dev);
++              del_timer(&can->bec_poll_timer);
++              kvaser_pciefd_pwm_stop(can);
++      }
++      kvaser_pciefd_disable_irq_srcs(pcie);
+       free_irq(pcie->pci->irq, pcie);
++      for (i = 0; i < pcie->nr_channels; ++i)
++              free_candev(pcie->can[i]->can.dev);
++
+       pci_iounmap(pdev, pcie->reg_base);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
diff --git a/queue-6.6/hrtimers-force-migrate-away-hrtimers-queued-after-cpuhp_ap_hrtimers_dying.patch b/queue-6.6/hrtimers-force-migrate-away-hrtimers-queued-after-cpuhp_ap_hrtimers_dying.patch
new file mode 100644 (file)
index 0000000..293e0f6
--- /dev/null
@@ -0,0 +1,270 @@
+From 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 Mon Sep 17 00:00:00 2001
+From: Frederic Weisbecker <frederic@kernel.org>
+Date: Sat, 18 Jan 2025 00:24:33 +0100
+Subject: hrtimers: Force migrate away hrtimers queued after CPUHP_AP_HRTIMERS_DYING
+
+From: Frederic Weisbecker <frederic@kernel.org>
+
+commit 53dac345395c0d2493cbc2f4c85fe38aef5b63f5 upstream.
+
+hrtimers are migrated away from the dying CPU to any online target at
+the CPUHP_AP_HRTIMERS_DYING stage in order not to delay bandwidth timers
+handling tasks involved in the CPU hotplug forward progress.
+
+However wakeups can still be performed by the outgoing CPU after
+CPUHP_AP_HRTIMERS_DYING. Those can result again in bandwidth timers being
+armed. Depending on several considerations (crystal ball power management
+based election, earliest timer already enqueued, timer migration enabled or
+not), the target may eventually be the current CPU even if offline. If that
+happens, the timer is eventually ignored.
+
+The most notable example is RCU which had to deal with each and every of
+those wake-ups by deferring them to an online CPU, along with related
+workarounds:
+
+_ e787644caf76 (rcu: Defer RCU kthreads wakeup when CPU is dying)
+_ 9139f93209d1 (rcu/nocb: Fix RT throttling hrtimer armed from offline CPU)
+_ f7345ccc62a4 (rcu/nocb: Fix rcuog wake-up from offline softirq)
+
+The problem isn't confined to RCU though as the stop machine kthread
+(which runs CPUHP_AP_HRTIMERS_DYING) reports its completion at the end
+of its work through cpu_stop_signal_done() and performs a wake up that
+eventually arms the deadline server timer:
+
+   WARNING: CPU: 94 PID: 588 at kernel/time/hrtimer.c:1086 hrtimer_start_range_ns+0x289/0x2d0
+   CPU: 94 UID: 0 PID: 588 Comm: migration/94 Not tainted
+   Stopper: multi_cpu_stop+0x0/0x120 <- stop_machine_cpuslocked+0x66/0xc0
+   RIP: 0010:hrtimer_start_range_ns+0x289/0x2d0
+   Call Trace:
+   <TASK>
+     start_dl_timer
+     enqueue_dl_entity
+     dl_server_start
+     enqueue_task_fair
+     enqueue_task
+     ttwu_do_activate
+     try_to_wake_up
+     complete
+     cpu_stopper_thread
+
+Instead of providing yet another bandaid to work around the situation, fix
+it in the hrtimers infrastructure instead: always migrate away a timer to
+an online target whenever it is enqueued from an offline CPU.
+
+This will also allow to revert all the above RCU disgraceful hacks.
+
+Fixes: 5c0930ccaad5 ("hrtimers: Push pending hrtimers away from outgoing CPU earlier")
+Reported-by: Vlad Poenaru <vlad.wing@gmail.com>
+Reported-by: Usama Arif <usamaarif642@gmail.com>
+Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Tested-by: Paul E. McKenney <paulmck@kernel.org>
+Link: https://lore.kernel.org/all/20250117232433.24027-1-frederic@kernel.org
+Closes: 20241213203739.1519801-1-usamaarif642@gmail.com
+Signed-off-by: Zhaoyang Li <lizy04@hust.edu.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/hrtimer.h |    1 
+ kernel/time/hrtimer.c   |  103 ++++++++++++++++++++++++++++++++++++++----------
+ 2 files changed, 83 insertions(+), 21 deletions(-)
+
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -237,6 +237,7 @@ struct hrtimer_cpu_base {
+       ktime_t                         softirq_expires_next;
+       struct hrtimer                  *softirq_next_timer;
+       struct hrtimer_clock_base       clock_base[HRTIMER_MAX_CLOCK_BASES];
++      call_single_data_t              csd;
+ } ____cacheline_aligned;
+ static inline void hrtimer_set_expires(struct hrtimer *timer, ktime_t time)
+--- a/kernel/time/hrtimer.c
++++ b/kernel/time/hrtimer.c
+@@ -58,6 +58,8 @@
+ #define HRTIMER_ACTIVE_SOFT   (HRTIMER_ACTIVE_HARD << MASK_SHIFT)
+ #define HRTIMER_ACTIVE_ALL    (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD)
++static void retrigger_next_event(void *arg);
++
+ /*
+  * The timer bases:
+  *
+@@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base,
+                       .clockid = CLOCK_TAI,
+                       .get_time = &ktime_get_clocktai,
+               },
+-      }
++      },
++      .csd = CSD_INIT(retrigger_next_event, NULL)
+ };
+ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
+@@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_t
+       [CLOCK_TAI]             = HRTIMER_BASE_TAI,
+ };
++static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base)
++{
++      if (!IS_ENABLED(CONFIG_HOTPLUG_CPU))
++              return true;
++      else
++              return likely(base->online);
++}
++
+ /*
+  * Functions and macros which are different for UP/SMP systems are kept in a
+  * single place
+@@ -178,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_
+ }
+ /*
+- * We do not migrate the timer when it is expiring before the next
+- * event on the target cpu. When high resolution is enabled, we cannot
+- * reprogram the target cpu hardware and we would cause it to fire
+- * late. To keep it simple, we handle the high resolution enabled and
+- * disabled case similar.
++ * Check if the elected target is suitable considering its next
++ * event and the hotplug state of the current CPU.
++ *
++ * If the elected target is remote and its next event is after the timer
++ * to queue, then a remote reprogram is necessary. However there is no
++ * guarantee the IPI handling the operation would arrive in time to meet
++ * the high resolution deadline. In this case the local CPU becomes a
++ * preferred target, unless it is offline.
++ *
++ * High and low resolution modes are handled the same way for simplicity.
+  *
+  * Called with cpu_base->lock of target cpu held.
+  */
+-static int
+-hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base)
++static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base,
++                                  struct hrtimer_cpu_base *new_cpu_base,
++                                  struct hrtimer_cpu_base *this_cpu_base)
+ {
+       ktime_t expires;
++      /*
++       * The local CPU clockevent can be reprogrammed. Also get_target_base()
++       * guarantees it is online.
++       */
++      if (new_cpu_base == this_cpu_base)
++              return true;
++
++      /*
++       * The offline local CPU can't be the default target if the
++       * next remote target event is after this timer. Keep the
++       * elected new base. An IPI will we issued to reprogram
++       * it as a last resort.
++       */
++      if (!hrtimer_base_is_online(this_cpu_base))
++              return true;
++
+       expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset);
+-      return expires < new_base->cpu_base->expires_next;
++
++      return expires >= new_base->cpu_base->expires_next;
+ }
+-static inline
+-struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base,
+-                                       int pinned)
++static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned)
+ {
++      if (!hrtimer_base_is_online(base)) {
++              int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER));
++
++              return &per_cpu(hrtimer_bases, cpu);
++      }
++
+ #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+       if (static_branch_likely(&timers_migration_enabled) && !pinned)
+               return &per_cpu(hrtimer_bases, get_nohz_timer_target());
+@@ -249,8 +287,8 @@ again:
+               raw_spin_unlock(&base->cpu_base->lock);
+               raw_spin_lock(&new_base->cpu_base->lock);
+-              if (new_cpu_base != this_cpu_base &&
+-                  hrtimer_check_target(timer, new_base)) {
++              if (!hrtimer_suitable_target(timer, new_base, new_cpu_base,
++                                           this_cpu_base)) {
+                       raw_spin_unlock(&new_base->cpu_base->lock);
+                       raw_spin_lock(&base->cpu_base->lock);
+                       new_cpu_base = this_cpu_base;
+@@ -259,8 +297,7 @@ again:
+               }
+               WRITE_ONCE(timer->base, new_base);
+       } else {
+-              if (new_cpu_base != this_cpu_base &&
+-                  hrtimer_check_target(timer, new_base)) {
++              if (!hrtimer_suitable_target(timer, new_base,  new_cpu_base, this_cpu_base)) {
+                       new_cpu_base = this_cpu_base;
+                       goto again;
+               }
+@@ -720,8 +757,6 @@ static inline int hrtimer_is_hres_enable
+       return hrtimer_hres_enabled;
+ }
+-static void retrigger_next_event(void *arg);
+-
+ /*
+  * Switch to high resolution mode
+  */
+@@ -1208,6 +1243,7 @@ static int __hrtimer_start_range_ns(stru
+                                   u64 delta_ns, const enum hrtimer_mode mode,
+                                   struct hrtimer_clock_base *base)
+ {
++      struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases);
+       struct hrtimer_clock_base *new_base;
+       bool force_local, first;
+@@ -1219,10 +1255,16 @@ static int __hrtimer_start_range_ns(stru
+        * and enforce reprogramming after it is queued no matter whether
+        * it is the new first expiring timer again or not.
+        */
+-      force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
++      force_local = base->cpu_base == this_cpu_base;
+       force_local &= base->cpu_base->next_timer == timer;
+       /*
++       * Don't force local queuing if this enqueue happens on a unplugged
++       * CPU after hrtimer_cpu_dying() has been invoked.
++       */
++      force_local &= this_cpu_base->online;
++
++      /*
+        * Remove an active timer from the queue. In case it is not queued
+        * on the current CPU, make sure that remove_hrtimer() updates the
+        * remote data correctly.
+@@ -1251,8 +1293,27 @@ static int __hrtimer_start_range_ns(stru
+       }
+       first = enqueue_hrtimer(timer, new_base, mode);
+-      if (!force_local)
+-              return first;
++      if (!force_local) {
++              /*
++               * If the current CPU base is online, then the timer is
++               * never queued on a remote CPU if it would be the first
++               * expiring timer there.
++               */
++              if (hrtimer_base_is_online(this_cpu_base))
++                      return first;
++
++              /*
++               * Timer was enqueued remote because the current base is
++               * already offline. If the timer is the first to expire,
++               * kick the remote CPU to reprogram the clock event.
++               */
++              if (first) {
++                      struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base;
++
++                      smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd);
++              }
++              return 0;
++      }
+       /*
+        * Timer was forced to stay on the current CPU to avoid
index 07a569d7629a6665a0d29ad9075ae56fe8a1c2ca..19f974e8dd3292b9a215cca2447d6e284fec79dc 100644 (file)
@@ -389,3 +389,6 @@ pinctrl-tegra-fix-off-by-one-in-tegra_pinctrl_get_group.patch
 i3c-master-svc-fix-implicit-fallthrough-in-svc_i3c_master_ibi_work.patch
 x86-mm-init-handle-the-special-case-of-device-private-pages-in-add_pages-to-not-increase-max_pfn-and-trigger-dma_addressing_limited-bounce-buffers.patch
 drm-gem-internally-test-import_attach-for-imported-objects.patch
+can-kvaser_pciefd-force-irq-edge-in-case-of-nested-irq.patch
+hrtimers-force-migrate-away-hrtimers-queued-after-cpuhp_ap_hrtimers_dying.patch
+btrfs-check-folio-mapping-after-unlock-in-relocate_one_folio.patch