Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)

committer Sasha Levin <sashal@kernel.org>

Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)
author Sasha Levin <sashal@kernel.org>
Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)
committer Sasha Levin <sashal@kernel.org>
Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)
diff --git a/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch b/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch

new file mode 100644 (file)

index 0000000..079701a
--- /dev/null
+++ b/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch
@@ -0,0 +1,91 @@
+From 360721969b19da9153fdec89aaed5bd317817e94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 09:26:20 +0200
+Subject: irq-bcm6345-l1: Do not assume a fixed block to cpu mapping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 55ad24857341c36616ecc1d9580af5626c226cf1 ]
+
+The irq to block mapping is fixed, and interrupts from the first block
+will always be routed to the first parent IRQ. But the parent interrupts
+themselves can be routed to any available CPU.
+
+This is used by the bootloader to map the first parent interrupt to the
+boot CPU, regardless wether the boot CPU is the first one or the second
+one.
+
+When booting from the second CPU, the assumption that the first block's
+IRQ is mapped to the first CPU breaks, and the system hangs because
+interrupts do not get routed correctly.
+
+Fix this by passing the appropriate bcm6434_l1_cpu to the interrupt
+handler instead of the chip itself, so the handler always has the right
+block.
+
+Fixes: c7c42ec2baa1 ("irqchips/bmips: Add bcm6345-l1 interrupt controller")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230629072620.62527-1-jonas.gorski@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-bcm6345-l1.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
+index 6899e37810a88..b14c74f7b9b98 100644
+--- a/drivers/irqchip/irq-bcm6345-l1.c
++++ b/drivers/irqchip/irq-bcm6345-l1.c
+@@ -82,6 +82,7 @@ struct bcm6345_l1_chip {
+ };
+ 
+ struct bcm6345_l1_cpu {
++      struct bcm6345_l1_chip  *intc;
+       void __iomem            *map_base;
+       unsigned int            parent_irq;
+       u32                     enable_cache[];
+@@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
+ 
+ static void bcm6345_l1_irq_handle(struct irq_desc *desc)
+ {
+-      struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
+-      struct bcm6345_l1_cpu *cpu;
++      struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc);
++      struct bcm6345_l1_chip *intc = cpu->intc;
+       struct irq_chip *chip = irq_desc_get_chip(desc);
+       unsigned int idx;
+ 
+-#ifdef CONFIG_SMP
+-      cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
+-#else
+-      cpu = intc->cpus[0];
+-#endif
+-
+       chained_irq_enter(chip, desc);
+ 
+       for (idx = 0; idx < intc->n_words; idx++) {
+@@ -253,6 +248,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+       if (!cpu)
+               return -ENOMEM;
+ 
++      cpu->intc = intc;
+       cpu->map_base = ioremap(res.start, sz);
+       if (!cpu->map_base)
+               return -ENOMEM;
+@@ -268,7 +264,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+               return -EINVAL;
+       }
+       irq_set_chained_handler_and_data(cpu->parent_irq,
+-                                              bcm6345_l1_irq_handle, intc);
++                                              bcm6345_l1_irq_handle, cpu);
+ 
+       return 0;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch b/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch

new file mode 100644 (file)

index 0000000..b7d8040
--- /dev/null
+++ b/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch
@@ -0,0 +1,161 @@
+From 9118285db47f05daee34c6b1e91484c702306802 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 17 Jun 2023 08:32:42 +0100
+Subject: irqchip/gic-v4.1: Properly lock VPEs when doing a directLPI
+ invalidation
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 926846a703cbf5d0635cc06e67d34b228746554b ]
+
+We normally rely on the irq_to_cpuid_[un]lock() primitives to make
+sure nothing will change col->idx while performing a LPI invalidation.
+
+However, these primitives do not cover VPE doorbells, and we have
+some open-coded locking for that. Unfortunately, this locking is
+pretty bogus.
+
+Instead, extend the above primitives to cover VPE doorbells and
+convert the whole thing to it.
+
+Fixes: f3a059219bc7 ("irqchip/gic-v4.1: Ensure mutual exclusion between vPE affinity change and RD access")
+Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: Zenghui Yu <yuzenghui@huawei.com>
+Cc: wanghaibin.wang@huawei.com
+Tested-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
+Link: https://lore.kernel.org/r/20230617073242.3199746-1-maz@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-gic-v3-its.c | 75 ++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
+index 973ede0197e36..8956881503d9a 100644
+--- a/drivers/irqchip/irq-gic-v3-its.c
++++ b/drivers/irqchip/irq-gic-v3-its.c
+@@ -271,13 +271,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
+       raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
+ }
+ 
++static struct irq_chip its_vpe_irq_chip;
++
+ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
++      struct its_vpe *vpe = NULL;
+       int cpu;
+ 
+-      if (map) {
+-              cpu = vpe_to_cpuid_lock(map->vpe, flags);
++      if (d->chip == &its_vpe_irq_chip) {
++              vpe = irq_data_get_irq_chip_data(d);
++      } else {
++              struct its_vlpi_map *map = get_vlpi_map(d);
++              if (map)
++                      vpe = map->vpe;
++      }
++
++      if (vpe) {
++              cpu = vpe_to_cpuid_lock(vpe, flags);
+       } else {
+               /* Physical LPIs are already locked via the irq_desc lock */
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+@@ -291,10 +301,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+ 
+ static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
++      struct its_vpe *vpe = NULL;
++
++      if (d->chip == &its_vpe_irq_chip) {
++              vpe = irq_data_get_irq_chip_data(d);
++      } else {
++              struct its_vlpi_map *map = get_vlpi_map(d);
++              if (map)
++                      vpe = map->vpe;
++      }
+ 
+-      if (map)
+-              vpe_to_cpuid_unlock(map->vpe, flags);
++      if (vpe)
++              vpe_to_cpuid_unlock(vpe, flags);
+ }
+ 
+ static struct its_collection *valid_col(struct its_collection *col)
+@@ -1431,14 +1449,29 @@ static void wait_for_syncr(void __iomem *rdbase)
+               cpu_relax();
+ }
+ 
+-static void direct_lpi_inv(struct irq_data *d)
++static void __direct_lpi_inv(struct irq_data *d, u64 val)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
+       void __iomem *rdbase;
+       unsigned long flags;
+-      u64 val;
+       int cpu;
+ 
++      /* Target the redistributor this LPI is currently routed to */
++      cpu = irq_to_cpuid_lock(d, &flags);
++      raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
++
++      rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
++      gic_write_lpir(val, rdbase + GICR_INVLPIR);
++      wait_for_syncr(rdbase);
++
++      raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
++      irq_to_cpuid_unlock(d, flags);
++}
++
++static void direct_lpi_inv(struct irq_data *d)
++{
++      struct its_vlpi_map *map = get_vlpi_map(d);
++      u64 val;
++
+       if (map) {
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ 
+@@ -1451,15 +1484,7 @@ static void direct_lpi_inv(struct irq_data *d)
+               val = d->hwirq;
+       }
+ 
+-      /* Target the redistributor this LPI is currently routed to */
+-      cpu = irq_to_cpuid_lock(d, &flags);
+-      raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
+-      rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
+-      gic_write_lpir(val, rdbase + GICR_INVLPIR);
+-
+-      wait_for_syncr(rdbase);
+-      raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
+-      irq_to_cpuid_unlock(d, flags);
++      __direct_lpi_inv(d, val);
+ }
+ 
+ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
+@@ -3941,18 +3966,10 @@ static void its_vpe_send_inv(struct irq_data *d)
+ {
+       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+ 
+-      if (gic_rdists->has_direct_lpi) {
+-              void __iomem *rdbase;
+-
+-              /* Target the redistributor this VPE is currently known on */
+-              raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+-              rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+-              gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
+-              wait_for_syncr(rdbase);
+-              raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+-      } else {
++      if (gic_rdists->has_direct_lpi)
++              __direct_lpi_inv(d, d->parent_data->hwirq);
++      else
+               its_vpe_send_cmd(vpe, its_send_inv);
+-      }
+ }
+ 
+ static void its_vpe_mask_irq(struct irq_data *d)
+-- 
+2.40.1
+
diff --git a/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch b/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch

new file mode 100644 (file)

index 0000000..abeb21a
--- /dev/null
+++ b/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch
@@ -0,0 +1,614 @@
+From ea5f6ff43b7eafe8d6cd6c180c394b5c572a3fcf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 16:19:09 +0200
+Subject: locking/rtmutex: Fix task->pi_waiters integrity
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit f7853c34241807bb97673a5e97719123be39a09e ]
+
+Henry reported that rt_mutex_adjust_prio_check() has an ordering
+problem and puts the lie to the comment in [7]. Sharing the sort key
+between lock->waiters and owner->pi_waiters *does* create problems,
+since unlike what the comment claims, holding [L] is insufficient.
+
+Notably, consider:
+
+       A
+      /   \
+     M1   M2
+     |     |
+     B     C
+
+That is, task A owns both M1 and M2, B and C block on them. In this
+case a concurrent chain walk (B & C) will modify their resp. sort keys
+in [7] while holding M1->wait_lock and M2->wait_lock. So holding [L]
+is meaningless, they're different Ls.
+
+This then gives rise to a race condition between [7] and [11], where
+the requeue of pi_waiters will observe an inconsistent tree order.
+
+       B                               C
+
+  (holds M1->wait_lock,                (holds M2->wait_lock,
+   holds B->pi_lock)            holds A->pi_lock)
+
+  [7]
+  waiter_update_prio();
+  ...
+  [8]
+  raw_spin_unlock(B->pi_lock);
+  ...
+  [10]
+  raw_spin_lock(A->pi_lock);
+
+                               [11]
+                               rt_mutex_enqueue_pi();
+                               // observes inconsistent A->pi_waiters
+                               // tree order
+
+Fixing this means either extending the range of the owner lock from
+[10-13] to [6-13], with the immediate problem that this means [6-8]
+hold both blocked and owner locks, or duplicating the sort key.
+
+Since the locking in chain walk is horrible enough without having to
+consider pi_lock nesting rules, duplicate the sort key instead.
+
+By giving each tree their own sort key, the above race becomes
+harmless, if C sees B at the old location, then B will correct things
+(if they need correcting) when it walks up the chain and reaches A.
+
+Fixes: fb00aca47440 ("rtmutex: Turn the plist into an rb-tree")
+Reported-by: Henry Wu <triangletrap12@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Henry Wu <triangletrap12@gmail.com>
+Link: https://lkml.kernel.org/r/20230707161052.GF2883469%40hirez.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/rtmutex.c        | 170 +++++++++++++++++++++-----------
+ kernel/locking/rtmutex_api.c    |   2 +-
+ kernel/locking/rtmutex_common.h |  47 ++++++---
+ kernel/locking/ww_mutex.h       |  12 +--
+ 4 files changed, 155 insertions(+), 76 deletions(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index 728f434de2bbf..21db0df0eb000 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
+       return prio;
+ }
+ 
++/*
++ * Update the waiter->tree copy of the sort keys.
++ */
+ static __always_inline void
+ waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+ {
+-      waiter->prio = __waiter_prio(task);
+-      waiter->deadline = task->dl.deadline;
++      lockdep_assert_held(&waiter->lock->wait_lock);
++      lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
++
++      waiter->tree.prio = __waiter_prio(task);
++      waiter->tree.deadline = task->dl.deadline;
++}
++
++/*
++ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
++ */
++static __always_inline void
++waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
++{
++      lockdep_assert_held(&waiter->lock->wait_lock);
++      lockdep_assert_held(&task->pi_lock);
++      lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
++
++      waiter->pi_tree.prio = waiter->tree.prio;
++      waiter->pi_tree.deadline = waiter->tree.deadline;
+ }
+ 
+ /*
+- * Only use with rt_mutex_waiter_{less,equal}()
++ * Only use with rt_waiter_node_{less,equal}()
+  */
++#define task_to_waiter_node(p)        \
++      &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
+ #define task_to_waiter(p)     \
+-      &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
++      &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
+ 
+-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+-                                              struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
++                                             struct rt_waiter_node *right)
+ {
+       if (left->prio < right->prio)
+               return 1;
+@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+       return 0;
+ }
+ 
+-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+-                                               struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
++                                               struct rt_waiter_node *right)
+ {
+       if (left->prio != right->prio)
+               return 0;
+@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+                                 struct rt_mutex_waiter *top_waiter)
+ {
+-      if (rt_mutex_waiter_less(waiter, top_waiter))
++      if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
+               return true;
+ 
+ #ifdef RT_MUTEX_BUILD_SPINLOCKS
+@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+        * Note that RT tasks are excluded from same priority (lateral)
+        * steals to prevent the introduction of an unbounded latency.
+        */
+-      if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
++      if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
+               return false;
+ 
+-      return rt_mutex_waiter_equal(waiter, top_waiter);
++      return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
+ #else
+       return false;
+ #endif
+ }
+ 
+ #define __node_2_waiter(node) \
+-      rb_entry((node), struct rt_mutex_waiter, tree_entry)
++      rb_entry((node), struct rt_mutex_waiter, tree.entry)
+ 
+ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+       struct rt_mutex_waiter *aw = __node_2_waiter(a);
+       struct rt_mutex_waiter *bw = __node_2_waiter(b);
+ 
+-      if (rt_mutex_waiter_less(aw, bw))
++      if (rt_waiter_node_less(&aw->tree, &bw->tree))
+               return 1;
+ 
+       if (!build_ww_mutex())
+               return 0;
+ 
+-      if (rt_mutex_waiter_less(bw, aw))
++      if (rt_waiter_node_less(&bw->tree, &aw->tree))
+               return 0;
+ 
+       /* NOTE: relies on waiter->ww_ctx being set before insertion */
+@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
+ static __always_inline void
+ rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+-      rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
++      lockdep_assert_held(&lock->wait_lock);
++
++      rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
+ }
+ 
+ static __always_inline void
+ rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+-      if (RB_EMPTY_NODE(&waiter->tree_entry))
++      lockdep_assert_held(&lock->wait_lock);
++
++      if (RB_EMPTY_NODE(&waiter->tree.entry))
+               return;
+ 
+-      rb_erase_cached(&waiter->tree_entry, &lock->waiters);
+-      RB_CLEAR_NODE(&waiter->tree_entry);
++      rb_erase_cached(&waiter->tree.entry, &lock->waiters);
++      RB_CLEAR_NODE(&waiter->tree.entry);
+ }
+ 
+-#define __node_2_pi_waiter(node) \
+-      rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
++#define __node_2_rt_node(node) \
++      rb_entry((node), struct rt_waiter_node, entry)
+ 
+-static __always_inline bool
+-__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
++static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+-      return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
++      return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
+ }
+ 
+ static __always_inline void
+ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+-      rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
++      lockdep_assert_held(&task->pi_lock);
++
++      rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
+ }
+ 
+ static __always_inline void
+ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+-      if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
++      lockdep_assert_held(&task->pi_lock);
++
++      if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
+               return;
+ 
+-      rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
+-      RB_CLEAR_NODE(&waiter->pi_tree_entry);
++      rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
++      RB_CLEAR_NODE(&waiter->pi_tree.entry);
+ }
+ 
+-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
++static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
++                                               struct task_struct *p)
+ {
+       struct task_struct *pi_task = NULL;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++      lockdep_assert(rt_mutex_owner(lock) == p);
+       lockdep_assert_held(&p->pi_lock);
+ 
+       if (task_has_pi_waiters(p))
+@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+  * Chain walk basics and protection scope
+  *
+  * [R] refcount on task
+- * [P] task->pi_lock held
++ * [Pn] task->pi_lock held
+  * [L] rtmutex->wait_lock held
+  *
++ * Normal locking order:
++ *
++ *   rtmutex->wait_lock
++ *     task->pi_lock
++ *
+  * Step       Description                             Protected by
+  *    function arguments:
+  *    @task                                   [R]
+@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+  *    again:
+  *      loop_sanity_check();
+  *    retry:
+- * [1]          lock(task->pi_lock);                  [R] acquire [P]
+- * [2]          waiter = task->pi_blocked_on;         [P]
+- * [3]          check_exit_conditions_1();            [P]
+- * [4]          lock = waiter->lock;                  [P]
+- * [5]          if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
+- *        unlock(task->pi_lock);              release [P]
++ * [1]          lock(task->pi_lock);                  [R] acquire [P1]
++ * [2]          waiter = task->pi_blocked_on;         [P1]
++ * [3]          check_exit_conditions_1();            [P1]
++ * [4]          lock = waiter->lock;                  [P1]
++ * [5]          if (!try_lock(lock->wait_lock)) {     [P1] try to acquire [L]
++ *        unlock(task->pi_lock);              release [P1]
+  *        goto retry;
+  *      }
+- * [6]          check_exit_conditions_2();            [P] + [L]
+- * [7]          requeue_lock_waiter(lock, waiter);    [P] + [L]
+- * [8]          unlock(task->pi_lock);                release [P]
++ * [6]          check_exit_conditions_2();            [P1] + [L]
++ * [7]          requeue_lock_waiter(lock, waiter);    [P1] + [L]
++ * [8]          unlock(task->pi_lock);                release [P1]
+  *      put_task_struct(task);                release [R]
+  * [9]          check_exit_conditions_3();            [L]
+  * [10]         task = owner(lock);                   [L]
+  *      get_task_struct(task);                [L] acquire [R]
+- *      lock(task->pi_lock);                  [L] acquire [P]
+- * [11]         requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
+- * [12]         check_exit_conditions_4();            [P] + [L]
+- * [13]         unlock(task->pi_lock);                release [P]
++ *      lock(task->pi_lock);                  [L] acquire [P2]
++ * [11]         requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
++ * [12]         check_exit_conditions_4();            [P2] + [L]
++ * [13]         unlock(task->pi_lock);                release [P2]
+  *      unlock(lock->wait_lock);              release [L]
+  *      goto again;
++ *
++ * Where P1 is the blocking task and P2 is the lock owner; going up one step
++ * the owner becomes the next blocked task etc..
++ *
++*
+  */
+ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                                             enum rtmutex_chainwalk chwalk,
+@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+        * enabled we continue, but stop the requeueing in the chain
+        * walk.
+        */
+-      if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++      if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+               if (!detect_deadlock)
+                       goto out_unlock_pi;
+               else
+@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+       }
+ 
+       /*
+-       * [4] Get the next lock
++       * [4] Get the next lock; per holding task->pi_lock we can't unblock
++       * and guarantee @lock's existence.
+        */
+       lock = waiter->lock;
+       /*
+        * [5] We need to trylock here as we are holding task->pi_lock,
+        * which is the reverse lock order versus the other rtmutex
+        * operations.
++       *
++       * Per the above, holding task->pi_lock guarantees lock exists, so
++       * inverting this lock order is infeasible from a life-time
++       * perspective.
+        */
+       if (!raw_spin_trylock(&lock->wait_lock)) {
+               raw_spin_unlock_irq(&task->pi_lock);
+@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+        * or
+        *
+        *   DL CBS enforcement advancing the effective deadline.
+-       *
+-       * Even though pi_waiters also uses these fields, and that tree is only
+-       * updated in [11], we can do this here, since we hold [L], which
+-       * serializes all pi_waiters access and rb_erase() does not care about
+-       * the values of the node being removed.
+        */
+       waiter_update_prio(waiter, task);
+ 
+       rt_mutex_enqueue(lock, waiter);
+ 
+-      /* [8] Release the task */
++      /*
++       * [8] Release the (blocking) task in preparation for
++       * taking the owner task in [10].
++       *
++       * Since we hold lock->waiter_lock, task cannot unblock, even if we
++       * release task->pi_lock.
++       */
+       raw_spin_unlock(&task->pi_lock);
+       put_task_struct(task);
+ 
+@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+               return 0;
+       }
+ 
+-      /* [10] Grab the next task, i.e. the owner of @lock */
++      /*
++       * [10] Grab the next task, i.e. the owner of @lock
++       *
++       * Per holding lock->wait_lock and checking for !owner above, there
++       * must be an owner and it cannot go away.
++       */
+       task = get_task_struct(rt_mutex_owner(lock));
+       raw_spin_lock(&task->pi_lock);
+ 
+@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                * and adjust the priority of the owner.
+                */
+               rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
++              waiter_clone_prio(waiter, task);
+               rt_mutex_enqueue_pi(task, waiter);
+-              rt_mutex_adjust_prio(task);
++              rt_mutex_adjust_prio(lock, task);
+ 
+       } else if (prerequeue_top_waiter == waiter) {
+               /*
+@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                */
+               rt_mutex_dequeue_pi(task, waiter);
+               waiter = rt_mutex_top_waiter(lock);
++              waiter_clone_prio(waiter, task);
+               rt_mutex_enqueue_pi(task, waiter);
+-              rt_mutex_adjust_prio(task);
++              rt_mutex_adjust_prio(lock, task);
+       } else {
+               /*
+                * Nothing changed. No need to do any priority
+@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+       waiter->task = task;
+       waiter->lock = lock;
+       waiter_update_prio(waiter, task);
++      waiter_clone_prio(waiter, task);
+ 
+       /* Get the top priority waiter on the lock */
+       if (rt_mutex_has_waiters(lock))
+@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+               rt_mutex_dequeue_pi(owner, top_waiter);
+               rt_mutex_enqueue_pi(owner, waiter);
+ 
+-              rt_mutex_adjust_prio(owner);
++              rt_mutex_adjust_prio(lock, owner);
+               if (owner->pi_blocked_on)
+                       chain_walk = 1;
+       } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+ {
+       struct rt_mutex_waiter *waiter;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++
+       raw_spin_lock(&current->pi_lock);
+ 
+       waiter = rt_mutex_top_waiter(lock);
+@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+        * task unblocks.
+        */
+       rt_mutex_dequeue_pi(current, waiter);
+-      rt_mutex_adjust_prio(current);
++      rt_mutex_adjust_prio(lock, current);
+ 
+       /*
+        * As we are waking up the top waiter, and the waiter stays
+@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
+       if (rt_mutex_has_waiters(lock))
+               rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+ 
+-      rt_mutex_adjust_prio(owner);
++      rt_mutex_adjust_prio(lock, owner);
+ 
+       /* Store the lock on which owner is blocked or NULL */
+       next_lock = task_blocked_on_lock(owner);
+diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
+index cb9fdff76a8a3..a6974d0445930 100644
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+ 
+       waiter = task->pi_blocked_on;
+-      if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++      if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               return;
+       }
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index c47e8361bfb5c..1162e07cdaea1 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -17,27 +17,44 @@
+ #include <linux/rtmutex.h>
+ #include <linux/sched/wake_q.h>
+ 
++
++/*
++ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
++ * separate trees and they need their own copy of the sort keys because of
++ * different locking requirements.
++ *
++ * @entry:            rbtree node to enqueue into the waiters tree
++ * @prio:             Priority of the waiter
++ * @deadline:         Deadline of the waiter if applicable
++ *
++ * See rt_waiter_node_less() and waiter_*_prio().
++ */
++struct rt_waiter_node {
++      struct rb_node  entry;
++      int             prio;
++      u64             deadline;
++};
++
+ /*
+  * This is the control structure for tasks blocked on a rt_mutex,
+  * which is allocated on the kernel stack on of the blocked task.
+  *
+- * @tree_entry:               pi node to enqueue into the mutex waiters tree
+- * @pi_tree_entry:    pi node to enqueue into the mutex owner waiters tree
++ * @tree:             node to enqueue into the mutex waiters tree
++ * @pi_tree:          node to enqueue into the mutex owner waiters tree
+  * @task:             task reference to the blocked task
+  * @lock:             Pointer to the rt_mutex on which the waiter blocks
+  * @wake_state:               Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
+- * @prio:             Priority of the waiter
+- * @deadline:         Deadline of the waiter if applicable
+  * @ww_ctx:           WW context pointer
++ *
++ * @tree is ordered by @lock->wait_lock
++ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
+  */
+ struct rt_mutex_waiter {
+-      struct rb_node          tree_entry;
+-      struct rb_node          pi_tree_entry;
++      struct rt_waiter_node   tree;
++      struct rt_waiter_node   pi_tree;
+       struct task_struct      *task;
+       struct rt_mutex_base    *lock;
+       unsigned int            wake_state;
+-      int                     prio;
+-      u64                     deadline;
+       struct ww_acquire_ctx   *ww_ctx;
+ };
+ 
+@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
+ {
+       struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+ 
+-      return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
++      return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
+ }
+ 
+ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
+       struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+       struct rt_mutex_waiter *w = NULL;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++
+       if (leftmost) {
+-              w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
++              w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
+               BUG_ON(w->lock != lock);
+       }
+       return w;
+@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
+ 
+ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
+ {
++      lockdep_assert_held(&p->pi_lock);
++
+       return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
+-                      pi_tree_entry);
++                      pi_tree.entry);
+ }
+ 
+ #define RT_MUTEX_HAS_WAITERS  1UL
+@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
+ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+ {
+       debug_rt_mutex_init_waiter(waiter);
+-      RB_CLEAR_NODE(&waiter->pi_tree_entry);
+-      RB_CLEAR_NODE(&waiter->tree_entry);
++      RB_CLEAR_NODE(&waiter->pi_tree.entry);
++      RB_CLEAR_NODE(&waiter->tree.entry);
+       waiter->wake_state = TASK_NORMAL;
+       waiter->task = NULL;
+ }
+diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
+index 56f139201f246..3ad2cc4823e59 100644
+--- a/kernel/locking/ww_mutex.h
++++ b/kernel/locking/ww_mutex.h
+@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
+       struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+-      struct rb_node *n = rb_next(&w->tree_entry);
++      struct rb_node *n = rb_next(&w->tree.entry);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+-      struct rb_node *n = rb_prev(&w->tree_entry);
++      struct rb_node *n = rb_prev(&w->tree.entry);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
+       struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline void
+-- 
+2.40.1
+
diff --git a/queue-6.1/series b/queue-6.1/series

index cb17833e271b0826b3a07e318cb48c0ff7eab963..75055ca5f4fbbab7a3bd563eee9f87bfd6d7735c 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -188,3 +188,6 @@ file-always-lock-position-for-fmode_atomic_pos.patch
  nfsd-remove-incorrect-check-in-nfsd4_validate_stateid.patch
  acpi-iort-remove-erroneous-id_count-check-in-iort_node_get_rmr_info.patch
  tpm_tis-explicitly-check-for-error-code.patch
+irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch
+irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch
+locking-rtmutex-fix-task-pi_waiters-integrity.patch
author	Sasha Levin <sashal@kernel.org>
	Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Tue, 1 Aug 2023 01:06:54 +0000 (21:06 -0400)
queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history