From: Sasha Levin Date: Tue, 1 Aug 2023 01:06:54 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v5.15.124~56 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=199d38737732c813a6873f72945ee93803ca1b60;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch b/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch new file mode 100644 index 00000000000..079701a43b3 --- /dev/null +++ b/queue-6.1/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch @@ -0,0 +1,91 @@ +From 360721969b19da9153fdec89aaed5bd317817e94 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Jun 2023 09:26:20 +0200 +Subject: irq-bcm6345-l1: Do not assume a fixed block to cpu mapping +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jonas Gorski + +[ Upstream commit 55ad24857341c36616ecc1d9580af5626c226cf1 ] + +The irq to block mapping is fixed, and interrupts from the first block +will always be routed to the first parent IRQ. But the parent interrupts +themselves can be routed to any available CPU. + +This is used by the bootloader to map the first parent interrupt to the +boot CPU, regardless wether the boot CPU is the first one or the second +one. + +When booting from the second CPU, the assumption that the first block's +IRQ is mapped to the first CPU breaks, and the system hangs because +interrupts do not get routed correctly. + +Fix this by passing the appropriate bcm6434_l1_cpu to the interrupt +handler instead of the chip itself, so the handler always has the right +block. + +Fixes: c7c42ec2baa1 ("irqchips/bmips: Add bcm6345-l1 interrupt controller") +Signed-off-by: Jonas Gorski +Reviewed-by: Philippe Mathieu-Daudé +Reviewed-by: Florian Fainelli +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230629072620.62527-1-jonas.gorski@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-bcm6345-l1.c | 14 +++++--------- + 1 file changed, 5 insertions(+), 9 deletions(-) + +diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c +index 6899e37810a88..b14c74f7b9b98 100644 +--- a/drivers/irqchip/irq-bcm6345-l1.c ++++ b/drivers/irqchip/irq-bcm6345-l1.c +@@ -82,6 +82,7 @@ struct bcm6345_l1_chip { + }; + + struct bcm6345_l1_cpu { ++ struct bcm6345_l1_chip *intc; + void __iomem *map_base; + unsigned int parent_irq; + u32 enable_cache[]; +@@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc, + + static void bcm6345_l1_irq_handle(struct irq_desc *desc) + { +- struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc); +- struct bcm6345_l1_cpu *cpu; ++ struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc); ++ struct bcm6345_l1_chip *intc = cpu->intc; + struct irq_chip *chip = irq_desc_get_chip(desc); + unsigned int idx; + +-#ifdef CONFIG_SMP +- cpu = intc->cpus[cpu_logical_map(smp_processor_id())]; +-#else +- cpu = intc->cpus[0]; +-#endif +- + chained_irq_enter(chip, desc); + + for (idx = 0; idx < intc->n_words; idx++) { +@@ -253,6 +248,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn, + if (!cpu) + return -ENOMEM; + ++ cpu->intc = intc; + cpu->map_base = ioremap(res.start, sz); + if (!cpu->map_base) + return -ENOMEM; +@@ -268,7 +264,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn, + return -EINVAL; + } + irq_set_chained_handler_and_data(cpu->parent_irq, +- bcm6345_l1_irq_handle, intc); ++ bcm6345_l1_irq_handle, cpu); + + return 0; + } +-- +2.40.1 + diff --git a/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch b/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch new file mode 100644 index 00000000000..b7d8040061b --- /dev/null +++ b/queue-6.1/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch @@ -0,0 +1,161 @@ +From 9118285db47f05daee34c6b1e91484c702306802 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 17 Jun 2023 08:32:42 +0100 +Subject: irqchip/gic-v4.1: Properly lock VPEs when doing a directLPI + invalidation + +From: Marc Zyngier + +[ Upstream commit 926846a703cbf5d0635cc06e67d34b228746554b ] + +We normally rely on the irq_to_cpuid_[un]lock() primitives to make +sure nothing will change col->idx while performing a LPI invalidation. + +However, these primitives do not cover VPE doorbells, and we have +some open-coded locking for that. Unfortunately, this locking is +pretty bogus. + +Instead, extend the above primitives to cover VPE doorbells and +convert the whole thing to it. + +Fixes: f3a059219bc7 ("irqchip/gic-v4.1: Ensure mutual exclusion between vPE affinity change and RD access") +Reported-by: Kunkun Jiang +Signed-off-by: Marc Zyngier +Cc: Zenghui Yu +Cc: wanghaibin.wang@huawei.com +Tested-by: Kunkun Jiang +Reviewed-by: Zenghui Yu +Link: https://lore.kernel.org/r/20230617073242.3199746-1-maz@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-gic-v3-its.c | 75 ++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 29 deletions(-) + +diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c +index 973ede0197e36..8956881503d9a 100644 +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -271,13 +271,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags) + raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags); + } + ++static struct irq_chip its_vpe_irq_chip; ++ + static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags) + { +- struct its_vlpi_map *map = get_vlpi_map(d); ++ struct its_vpe *vpe = NULL; + int cpu; + +- if (map) { +- cpu = vpe_to_cpuid_lock(map->vpe, flags); ++ if (d->chip == &its_vpe_irq_chip) { ++ vpe = irq_data_get_irq_chip_data(d); ++ } else { ++ struct its_vlpi_map *map = get_vlpi_map(d); ++ if (map) ++ vpe = map->vpe; ++ } ++ ++ if (vpe) { ++ cpu = vpe_to_cpuid_lock(vpe, flags); + } else { + /* Physical LPIs are already locked via the irq_desc lock */ + struct its_device *its_dev = irq_data_get_irq_chip_data(d); +@@ -291,10 +301,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags) + + static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags) + { +- struct its_vlpi_map *map = get_vlpi_map(d); ++ struct its_vpe *vpe = NULL; ++ ++ if (d->chip == &its_vpe_irq_chip) { ++ vpe = irq_data_get_irq_chip_data(d); ++ } else { ++ struct its_vlpi_map *map = get_vlpi_map(d); ++ if (map) ++ vpe = map->vpe; ++ } + +- if (map) +- vpe_to_cpuid_unlock(map->vpe, flags); ++ if (vpe) ++ vpe_to_cpuid_unlock(vpe, flags); + } + + static struct its_collection *valid_col(struct its_collection *col) +@@ -1431,14 +1449,29 @@ static void wait_for_syncr(void __iomem *rdbase) + cpu_relax(); + } + +-static void direct_lpi_inv(struct irq_data *d) ++static void __direct_lpi_inv(struct irq_data *d, u64 val) + { +- struct its_vlpi_map *map = get_vlpi_map(d); + void __iomem *rdbase; + unsigned long flags; +- u64 val; + int cpu; + ++ /* Target the redistributor this LPI is currently routed to */ ++ cpu = irq_to_cpuid_lock(d, &flags); ++ raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); ++ ++ rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; ++ gic_write_lpir(val, rdbase + GICR_INVLPIR); ++ wait_for_syncr(rdbase); ++ ++ raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); ++ irq_to_cpuid_unlock(d, flags); ++} ++ ++static void direct_lpi_inv(struct irq_data *d) ++{ ++ struct its_vlpi_map *map = get_vlpi_map(d); ++ u64 val; ++ + if (map) { + struct its_device *its_dev = irq_data_get_irq_chip_data(d); + +@@ -1451,15 +1484,7 @@ static void direct_lpi_inv(struct irq_data *d) + val = d->hwirq; + } + +- /* Target the redistributor this LPI is currently routed to */ +- cpu = irq_to_cpuid_lock(d, &flags); +- raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); +- rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; +- gic_write_lpir(val, rdbase + GICR_INVLPIR); +- +- wait_for_syncr(rdbase); +- raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); +- irq_to_cpuid_unlock(d, flags); ++ __direct_lpi_inv(d, val); + } + + static void lpi_update_config(struct irq_data *d, u8 clr, u8 set) +@@ -3941,18 +3966,10 @@ static void its_vpe_send_inv(struct irq_data *d) + { + struct its_vpe *vpe = irq_data_get_irq_chip_data(d); + +- if (gic_rdists->has_direct_lpi) { +- void __iomem *rdbase; +- +- /* Target the redistributor this VPE is currently known on */ +- raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); +- rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base; +- gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR); +- wait_for_syncr(rdbase); +- raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock); +- } else { ++ if (gic_rdists->has_direct_lpi) ++ __direct_lpi_inv(d, d->parent_data->hwirq); ++ else + its_vpe_send_cmd(vpe, its_send_inv); +- } + } + + static void its_vpe_mask_irq(struct irq_data *d) +-- +2.40.1 + diff --git a/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch b/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch new file mode 100644 index 00000000000..abeb21adcbd --- /dev/null +++ b/queue-6.1/locking-rtmutex-fix-task-pi_waiters-integrity.patch @@ -0,0 +1,614 @@ +From ea5f6ff43b7eafe8d6cd6c180c394b5c572a3fcf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Jul 2023 16:19:09 +0200 +Subject: locking/rtmutex: Fix task->pi_waiters integrity + +From: Peter Zijlstra + +[ Upstream commit f7853c34241807bb97673a5e97719123be39a09e ] + +Henry reported that rt_mutex_adjust_prio_check() has an ordering +problem and puts the lie to the comment in [7]. Sharing the sort key +between lock->waiters and owner->pi_waiters *does* create problems, +since unlike what the comment claims, holding [L] is insufficient. + +Notably, consider: + + A + / \ + M1 M2 + | | + B C + +That is, task A owns both M1 and M2, B and C block on them. In this +case a concurrent chain walk (B & C) will modify their resp. sort keys +in [7] while holding M1->wait_lock and M2->wait_lock. So holding [L] +is meaningless, they're different Ls. + +This then gives rise to a race condition between [7] and [11], where +the requeue of pi_waiters will observe an inconsistent tree order. + + B C + + (holds M1->wait_lock, (holds M2->wait_lock, + holds B->pi_lock) holds A->pi_lock) + + [7] + waiter_update_prio(); + ... + [8] + raw_spin_unlock(B->pi_lock); + ... + [10] + raw_spin_lock(A->pi_lock); + + [11] + rt_mutex_enqueue_pi(); + // observes inconsistent A->pi_waiters + // tree order + +Fixing this means either extending the range of the owner lock from +[10-13] to [6-13], with the immediate problem that this means [6-8] +hold both blocked and owner locks, or duplicating the sort key. + +Since the locking in chain walk is horrible enough without having to +consider pi_lock nesting rules, duplicate the sort key instead. + +By giving each tree their own sort key, the above race becomes +harmless, if C sees B at the old location, then B will correct things +(if they need correcting) when it walks up the chain and reaches A. + +Fixes: fb00aca47440 ("rtmutex: Turn the plist into an rb-tree") +Reported-by: Henry Wu +Signed-off-by: Peter Zijlstra (Intel) +Acked-by: Thomas Gleixner +Tested-by: Henry Wu +Link: https://lkml.kernel.org/r/20230707161052.GF2883469%40hirez.programming.kicks-ass.net +Signed-off-by: Sasha Levin +--- + kernel/locking/rtmutex.c | 170 +++++++++++++++++++++----------- + kernel/locking/rtmutex_api.c | 2 +- + kernel/locking/rtmutex_common.h | 47 ++++++--- + kernel/locking/ww_mutex.h | 12 +-- + 4 files changed, 155 insertions(+), 76 deletions(-) + +diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c +index 728f434de2bbf..21db0df0eb000 100644 +--- a/kernel/locking/rtmutex.c ++++ b/kernel/locking/rtmutex.c +@@ -333,21 +333,43 @@ static __always_inline int __waiter_prio(struct task_struct *task) + return prio; + } + ++/* ++ * Update the waiter->tree copy of the sort keys. ++ */ + static __always_inline void + waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) + { +- waiter->prio = __waiter_prio(task); +- waiter->deadline = task->dl.deadline; ++ lockdep_assert_held(&waiter->lock->wait_lock); ++ lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry)); ++ ++ waiter->tree.prio = __waiter_prio(task); ++ waiter->tree.deadline = task->dl.deadline; ++} ++ ++/* ++ * Update the waiter->pi_tree copy of the sort keys (from the tree copy). ++ */ ++static __always_inline void ++waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task) ++{ ++ lockdep_assert_held(&waiter->lock->wait_lock); ++ lockdep_assert_held(&task->pi_lock); ++ lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry)); ++ ++ waiter->pi_tree.prio = waiter->tree.prio; ++ waiter->pi_tree.deadline = waiter->tree.deadline; + } + + /* +- * Only use with rt_mutex_waiter_{less,equal}() ++ * Only use with rt_waiter_node_{less,equal}() + */ ++#define task_to_waiter_node(p) \ ++ &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } + #define task_to_waiter(p) \ +- &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline } ++ &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) } + +-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, +- struct rt_mutex_waiter *right) ++static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left, ++ struct rt_waiter_node *right) + { + if (left->prio < right->prio) + return 1; +@@ -364,8 +386,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left, + return 0; + } + +-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, +- struct rt_mutex_waiter *right) ++static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left, ++ struct rt_waiter_node *right) + { + if (left->prio != right->prio) + return 0; +@@ -385,7 +407,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left, + static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, + struct rt_mutex_waiter *top_waiter) + { +- if (rt_mutex_waiter_less(waiter, top_waiter)) ++ if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree)) + return true; + + #ifdef RT_MUTEX_BUILD_SPINLOCKS +@@ -393,30 +415,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter, + * Note that RT tasks are excluded from same priority (lateral) + * steals to prevent the introduction of an unbounded latency. + */ +- if (rt_prio(waiter->prio) || dl_prio(waiter->prio)) ++ if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio)) + return false; + +- return rt_mutex_waiter_equal(waiter, top_waiter); ++ return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree); + #else + return false; + #endif + } + + #define __node_2_waiter(node) \ +- rb_entry((node), struct rt_mutex_waiter, tree_entry) ++ rb_entry((node), struct rt_mutex_waiter, tree.entry) + + static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b) + { + struct rt_mutex_waiter *aw = __node_2_waiter(a); + struct rt_mutex_waiter *bw = __node_2_waiter(b); + +- if (rt_mutex_waiter_less(aw, bw)) ++ if (rt_waiter_node_less(&aw->tree, &bw->tree)) + return 1; + + if (!build_ww_mutex()) + return 0; + +- if (rt_mutex_waiter_less(bw, aw)) ++ if (rt_waiter_node_less(&bw->tree, &aw->tree)) + return 0; + + /* NOTE: relies on waiter->ww_ctx being set before insertion */ +@@ -434,48 +456,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod + static __always_inline void + rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + { +- rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less); ++ lockdep_assert_held(&lock->wait_lock); ++ ++ rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less); + } + + static __always_inline void + rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter) + { +- if (RB_EMPTY_NODE(&waiter->tree_entry)) ++ lockdep_assert_held(&lock->wait_lock); ++ ++ if (RB_EMPTY_NODE(&waiter->tree.entry)) + return; + +- rb_erase_cached(&waiter->tree_entry, &lock->waiters); +- RB_CLEAR_NODE(&waiter->tree_entry); ++ rb_erase_cached(&waiter->tree.entry, &lock->waiters); ++ RB_CLEAR_NODE(&waiter->tree.entry); + } + +-#define __node_2_pi_waiter(node) \ +- rb_entry((node), struct rt_mutex_waiter, pi_tree_entry) ++#define __node_2_rt_node(node) \ ++ rb_entry((node), struct rt_waiter_node, entry) + +-static __always_inline bool +-__pi_waiter_less(struct rb_node *a, const struct rb_node *b) ++static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b) + { +- return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b)); ++ return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b)); + } + + static __always_inline void + rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) + { +- rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less); ++ lockdep_assert_held(&task->pi_lock); ++ ++ rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less); + } + + static __always_inline void + rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter) + { +- if (RB_EMPTY_NODE(&waiter->pi_tree_entry)) ++ lockdep_assert_held(&task->pi_lock); ++ ++ if (RB_EMPTY_NODE(&waiter->pi_tree.entry)) + return; + +- rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters); +- RB_CLEAR_NODE(&waiter->pi_tree_entry); ++ rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters); ++ RB_CLEAR_NODE(&waiter->pi_tree.entry); + } + +-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p) ++static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock, ++ struct task_struct *p) + { + struct task_struct *pi_task = NULL; + ++ lockdep_assert_held(&lock->wait_lock); ++ lockdep_assert(rt_mutex_owner(lock) == p); + lockdep_assert_held(&p->pi_lock); + + if (task_has_pi_waiters(p)) +@@ -571,9 +603,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st + * Chain walk basics and protection scope + * + * [R] refcount on task +- * [P] task->pi_lock held ++ * [Pn] task->pi_lock held + * [L] rtmutex->wait_lock held + * ++ * Normal locking order: ++ * ++ * rtmutex->wait_lock ++ * task->pi_lock ++ * + * Step Description Protected by + * function arguments: + * @task [R] +@@ -588,27 +625,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st + * again: + * loop_sanity_check(); + * retry: +- * [1] lock(task->pi_lock); [R] acquire [P] +- * [2] waiter = task->pi_blocked_on; [P] +- * [3] check_exit_conditions_1(); [P] +- * [4] lock = waiter->lock; [P] +- * [5] if (!try_lock(lock->wait_lock)) { [P] try to acquire [L] +- * unlock(task->pi_lock); release [P] ++ * [1] lock(task->pi_lock); [R] acquire [P1] ++ * [2] waiter = task->pi_blocked_on; [P1] ++ * [3] check_exit_conditions_1(); [P1] ++ * [4] lock = waiter->lock; [P1] ++ * [5] if (!try_lock(lock->wait_lock)) { [P1] try to acquire [L] ++ * unlock(task->pi_lock); release [P1] + * goto retry; + * } +- * [6] check_exit_conditions_2(); [P] + [L] +- * [7] requeue_lock_waiter(lock, waiter); [P] + [L] +- * [8] unlock(task->pi_lock); release [P] ++ * [6] check_exit_conditions_2(); [P1] + [L] ++ * [7] requeue_lock_waiter(lock, waiter); [P1] + [L] ++ * [8] unlock(task->pi_lock); release [P1] + * put_task_struct(task); release [R] + * [9] check_exit_conditions_3(); [L] + * [10] task = owner(lock); [L] + * get_task_struct(task); [L] acquire [R] +- * lock(task->pi_lock); [L] acquire [P] +- * [11] requeue_pi_waiter(tsk, waiters(lock));[P] + [L] +- * [12] check_exit_conditions_4(); [P] + [L] +- * [13] unlock(task->pi_lock); release [P] ++ * lock(task->pi_lock); [L] acquire [P2] ++ * [11] requeue_pi_waiter(tsk, waiters(lock));[P2] + [L] ++ * [12] check_exit_conditions_4(); [P2] + [L] ++ * [13] unlock(task->pi_lock); release [P2] + * unlock(lock->wait_lock); release [L] + * goto again; ++ * ++ * Where P1 is the blocking task and P2 is the lock owner; going up one step ++ * the owner becomes the next blocked task etc.. ++ * ++* + */ + static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + enum rtmutex_chainwalk chwalk, +@@ -756,7 +798,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + * enabled we continue, but stop the requeueing in the chain + * walk. + */ +- if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { ++ if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { + if (!detect_deadlock) + goto out_unlock_pi; + else +@@ -764,13 +806,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + } + + /* +- * [4] Get the next lock ++ * [4] Get the next lock; per holding task->pi_lock we can't unblock ++ * and guarantee @lock's existence. + */ + lock = waiter->lock; + /* + * [5] We need to trylock here as we are holding task->pi_lock, + * which is the reverse lock order versus the other rtmutex + * operations. ++ * ++ * Per the above, holding task->pi_lock guarantees lock exists, so ++ * inverting this lock order is infeasible from a life-time ++ * perspective. + */ + if (!raw_spin_trylock(&lock->wait_lock)) { + raw_spin_unlock_irq(&task->pi_lock); +@@ -874,17 +921,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + * or + * + * DL CBS enforcement advancing the effective deadline. +- * +- * Even though pi_waiters also uses these fields, and that tree is only +- * updated in [11], we can do this here, since we hold [L], which +- * serializes all pi_waiters access and rb_erase() does not care about +- * the values of the node being removed. + */ + waiter_update_prio(waiter, task); + + rt_mutex_enqueue(lock, waiter); + +- /* [8] Release the task */ ++ /* ++ * [8] Release the (blocking) task in preparation for ++ * taking the owner task in [10]. ++ * ++ * Since we hold lock->waiter_lock, task cannot unblock, even if we ++ * release task->pi_lock. ++ */ + raw_spin_unlock(&task->pi_lock); + put_task_struct(task); + +@@ -908,7 +956,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + return 0; + } + +- /* [10] Grab the next task, i.e. the owner of @lock */ ++ /* ++ * [10] Grab the next task, i.e. the owner of @lock ++ * ++ * Per holding lock->wait_lock and checking for !owner above, there ++ * must be an owner and it cannot go away. ++ */ + task = get_task_struct(rt_mutex_owner(lock)); + raw_spin_lock(&task->pi_lock); + +@@ -921,8 +974,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + * and adjust the priority of the owner. + */ + rt_mutex_dequeue_pi(task, prerequeue_top_waiter); ++ waiter_clone_prio(waiter, task); + rt_mutex_enqueue_pi(task, waiter); +- rt_mutex_adjust_prio(task); ++ rt_mutex_adjust_prio(lock, task); + + } else if (prerequeue_top_waiter == waiter) { + /* +@@ -937,8 +991,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task, + */ + rt_mutex_dequeue_pi(task, waiter); + waiter = rt_mutex_top_waiter(lock); ++ waiter_clone_prio(waiter, task); + rt_mutex_enqueue_pi(task, waiter); +- rt_mutex_adjust_prio(task); ++ rt_mutex_adjust_prio(lock, task); + } else { + /* + * Nothing changed. No need to do any priority +@@ -1154,6 +1209,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, + waiter->task = task; + waiter->lock = lock; + waiter_update_prio(waiter, task); ++ waiter_clone_prio(waiter, task); + + /* Get the top priority waiter on the lock */ + if (rt_mutex_has_waiters(lock)) +@@ -1187,7 +1243,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, + rt_mutex_dequeue_pi(owner, top_waiter); + rt_mutex_enqueue_pi(owner, waiter); + +- rt_mutex_adjust_prio(owner); ++ rt_mutex_adjust_prio(lock, owner); + if (owner->pi_blocked_on) + chain_walk = 1; + } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) { +@@ -1234,6 +1290,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, + { + struct rt_mutex_waiter *waiter; + ++ lockdep_assert_held(&lock->wait_lock); ++ + raw_spin_lock(¤t->pi_lock); + + waiter = rt_mutex_top_waiter(lock); +@@ -1246,7 +1304,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh, + * task unblocks. + */ + rt_mutex_dequeue_pi(current, waiter); +- rt_mutex_adjust_prio(current); ++ rt_mutex_adjust_prio(lock, current); + + /* + * As we are waking up the top waiter, and the waiter stays +@@ -1482,7 +1540,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock, + if (rt_mutex_has_waiters(lock)) + rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock)); + +- rt_mutex_adjust_prio(owner); ++ rt_mutex_adjust_prio(lock, owner); + + /* Store the lock on which owner is blocked or NULL */ + next_lock = task_blocked_on_lock(owner); +diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c +index cb9fdff76a8a3..a6974d0445930 100644 +--- a/kernel/locking/rtmutex_api.c ++++ b/kernel/locking/rtmutex_api.c +@@ -459,7 +459,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task) + raw_spin_lock_irqsave(&task->pi_lock, flags); + + waiter = task->pi_blocked_on; +- if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) { ++ if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) { + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return; + } +diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h +index c47e8361bfb5c..1162e07cdaea1 100644 +--- a/kernel/locking/rtmutex_common.h ++++ b/kernel/locking/rtmutex_common.h +@@ -17,27 +17,44 @@ + #include + #include + ++ ++/* ++ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two ++ * separate trees and they need their own copy of the sort keys because of ++ * different locking requirements. ++ * ++ * @entry: rbtree node to enqueue into the waiters tree ++ * @prio: Priority of the waiter ++ * @deadline: Deadline of the waiter if applicable ++ * ++ * See rt_waiter_node_less() and waiter_*_prio(). ++ */ ++struct rt_waiter_node { ++ struct rb_node entry; ++ int prio; ++ u64 deadline; ++}; ++ + /* + * This is the control structure for tasks blocked on a rt_mutex, + * which is allocated on the kernel stack on of the blocked task. + * +- * @tree_entry: pi node to enqueue into the mutex waiters tree +- * @pi_tree_entry: pi node to enqueue into the mutex owner waiters tree ++ * @tree: node to enqueue into the mutex waiters tree ++ * @pi_tree: node to enqueue into the mutex owner waiters tree + * @task: task reference to the blocked task + * @lock: Pointer to the rt_mutex on which the waiter blocks + * @wake_state: Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT) +- * @prio: Priority of the waiter +- * @deadline: Deadline of the waiter if applicable + * @ww_ctx: WW context pointer ++ * ++ * @tree is ordered by @lock->wait_lock ++ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock + */ + struct rt_mutex_waiter { +- struct rb_node tree_entry; +- struct rb_node pi_tree_entry; ++ struct rt_waiter_node tree; ++ struct rt_waiter_node pi_tree; + struct task_struct *task; + struct rt_mutex_base *lock; + unsigned int wake_state; +- int prio; +- u64 deadline; + struct ww_acquire_ctx *ww_ctx; + }; + +@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock, + { + struct rb_node *leftmost = rb_first_cached(&lock->waiters); + +- return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter; ++ return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter; + } + + static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock) +@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base * + struct rb_node *leftmost = rb_first_cached(&lock->waiters); + struct rt_mutex_waiter *w = NULL; + ++ lockdep_assert_held(&lock->wait_lock); ++ + if (leftmost) { +- w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry); ++ w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry); + BUG_ON(w->lock != lock); + } + return w; +@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p) + + static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p) + { ++ lockdep_assert_held(&p->pi_lock); ++ + return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter, +- pi_tree_entry); ++ pi_tree.entry); + } + + #define RT_MUTEX_HAS_WAITERS 1UL +@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter) + static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter) + { + debug_rt_mutex_init_waiter(waiter); +- RB_CLEAR_NODE(&waiter->pi_tree_entry); +- RB_CLEAR_NODE(&waiter->tree_entry); ++ RB_CLEAR_NODE(&waiter->pi_tree.entry); ++ RB_CLEAR_NODE(&waiter->tree.entry); + waiter->wake_state = TASK_NORMAL; + waiter->task = NULL; + } +diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h +index 56f139201f246..3ad2cc4823e59 100644 +--- a/kernel/locking/ww_mutex.h ++++ b/kernel/locking/ww_mutex.h +@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock) + struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root); + if (!n) + return NULL; +- return rb_entry(n, struct rt_mutex_waiter, tree_entry); ++ return rb_entry(n, struct rt_mutex_waiter, tree.entry); + } + + static inline struct rt_mutex_waiter * + __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w) + { +- struct rb_node *n = rb_next(&w->tree_entry); ++ struct rb_node *n = rb_next(&w->tree.entry); + if (!n) + return NULL; +- return rb_entry(n, struct rt_mutex_waiter, tree_entry); ++ return rb_entry(n, struct rt_mutex_waiter, tree.entry); + } + + static inline struct rt_mutex_waiter * + __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w) + { +- struct rb_node *n = rb_prev(&w->tree_entry); ++ struct rb_node *n = rb_prev(&w->tree.entry); + if (!n) + return NULL; +- return rb_entry(n, struct rt_mutex_waiter, tree_entry); ++ return rb_entry(n, struct rt_mutex_waiter, tree.entry); + } + + static inline struct rt_mutex_waiter * +@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock) + struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root); + if (!n) + return NULL; +- return rb_entry(n, struct rt_mutex_waiter, tree_entry); ++ return rb_entry(n, struct rt_mutex_waiter, tree.entry); + } + + static inline void +-- +2.40.1 + diff --git a/queue-6.1/series b/queue-6.1/series index cb17833e271..75055ca5f4f 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -188,3 +188,6 @@ file-always-lock-position-for-fmode_atomic_pos.patch nfsd-remove-incorrect-check-in-nfsd4_validate_stateid.patch acpi-iort-remove-erroneous-id_count-check-in-iort_node_get_rmr_info.patch tpm_tis-explicitly-check-for-error-code.patch +irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch +irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch +locking-rtmutex-fix-task-pi_waiters-integrity.patch