Fixes for 5.15

author Sasha Levin <sashal@kernel.org>

Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)

committer Sasha Levin <sashal@kernel.org>

Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)
author Sasha Levin <sashal@kernel.org>
Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)
committer Sasha Levin <sashal@kernel.org>
Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)
diff --git a/queue-5.15/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch b/queue-5.15/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch

new file mode 100644 (file)

index 0000000..1351465
--- /dev/null
+++ b/queue-5.15/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch
@@ -0,0 +1,91 @@
+From 6a96235352aab62b62a4dff7f6faee5804daff0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 09:26:20 +0200
+Subject: irq-bcm6345-l1: Do not assume a fixed block to cpu mapping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 55ad24857341c36616ecc1d9580af5626c226cf1 ]
+
+The irq to block mapping is fixed, and interrupts from the first block
+will always be routed to the first parent IRQ. But the parent interrupts
+themselves can be routed to any available CPU.
+
+This is used by the bootloader to map the first parent interrupt to the
+boot CPU, regardless wether the boot CPU is the first one or the second
+one.
+
+When booting from the second CPU, the assumption that the first block's
+IRQ is mapped to the first CPU breaks, and the system hangs because
+interrupts do not get routed correctly.
+
+Fix this by passing the appropriate bcm6434_l1_cpu to the interrupt
+handler instead of the chip itself, so the handler always has the right
+block.
+
+Fixes: c7c42ec2baa1 ("irqchips/bmips: Add bcm6345-l1 interrupt controller")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230629072620.62527-1-jonas.gorski@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-bcm6345-l1.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/irqchip/irq-bcm6345-l1.c b/drivers/irqchip/irq-bcm6345-l1.c
+index ebc3a253f735d..7c5d8b791592e 100644
+--- a/drivers/irqchip/irq-bcm6345-l1.c
++++ b/drivers/irqchip/irq-bcm6345-l1.c
+@@ -82,6 +82,7 @@ struct bcm6345_l1_chip {
+ };
+ 
+ struct bcm6345_l1_cpu {
++      struct bcm6345_l1_chip  *intc;
+       void __iomem            *map_base;
+       unsigned int            parent_irq;
+       u32                     enable_cache[];
+@@ -115,17 +116,11 @@ static inline unsigned int cpu_for_irq(struct bcm6345_l1_chip *intc,
+ 
+ static void bcm6345_l1_irq_handle(struct irq_desc *desc)
+ {
+-      struct bcm6345_l1_chip *intc = irq_desc_get_handler_data(desc);
+-      struct bcm6345_l1_cpu *cpu;
++      struct bcm6345_l1_cpu *cpu = irq_desc_get_handler_data(desc);
++      struct bcm6345_l1_chip *intc = cpu->intc;
+       struct irq_chip *chip = irq_desc_get_chip(desc);
+       unsigned int idx;
+ 
+-#ifdef CONFIG_SMP
+-      cpu = intc->cpus[cpu_logical_map(smp_processor_id())];
+-#else
+-      cpu = intc->cpus[0];
+-#endif
+-
+       chained_irq_enter(chip, desc);
+ 
+       for (idx = 0; idx < intc->n_words; idx++) {
+@@ -257,6 +252,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+       if (!cpu)
+               return -ENOMEM;
+ 
++      cpu->intc = intc;
+       cpu->map_base = ioremap(res.start, sz);
+       if (!cpu->map_base)
+               return -ENOMEM;
+@@ -272,7 +268,7 @@ static int __init bcm6345_l1_init_one(struct device_node *dn,
+               return -EINVAL;
+       }
+       irq_set_chained_handler_and_data(cpu->parent_irq,
+-                                              bcm6345_l1_irq_handle, intc);
++                                              bcm6345_l1_irq_handle, cpu);
+ 
+       return 0;
+ }
+-- 
+2.40.1
+
diff --git a/queue-5.15/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch b/queue-5.15/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch

new file mode 100644 (file)

index 0000000..d89b6dc
--- /dev/null
+++ b/queue-5.15/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch
@@ -0,0 +1,161 @@
+From a908d487709c2741c09a92a9d766774e936aafd8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 17 Jun 2023 08:32:42 +0100
+Subject: irqchip/gic-v4.1: Properly lock VPEs when doing a directLPI
+ invalidation
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 926846a703cbf5d0635cc06e67d34b228746554b ]
+
+We normally rely on the irq_to_cpuid_[un]lock() primitives to make
+sure nothing will change col->idx while performing a LPI invalidation.
+
+However, these primitives do not cover VPE doorbells, and we have
+some open-coded locking for that. Unfortunately, this locking is
+pretty bogus.
+
+Instead, extend the above primitives to cover VPE doorbells and
+convert the whole thing to it.
+
+Fixes: f3a059219bc7 ("irqchip/gic-v4.1: Ensure mutual exclusion between vPE affinity change and RD access")
+Reported-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: Zenghui Yu <yuzenghui@huawei.com>
+Cc: wanghaibin.wang@huawei.com
+Tested-by: Kunkun Jiang <jiangkunkun@huawei.com>
+Reviewed-by: Zenghui Yu <yuzenghui@huawei.com>
+Link: https://lore.kernel.org/r/20230617073242.3199746-1-maz@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-gic-v3-its.c | 75 ++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
+index 59a5d06b2d3e4..490e6cfe510e6 100644
+--- a/drivers/irqchip/irq-gic-v3-its.c
++++ b/drivers/irqchip/irq-gic-v3-its.c
+@@ -267,13 +267,23 @@ static void vpe_to_cpuid_unlock(struct its_vpe *vpe, unsigned long flags)
+       raw_spin_unlock_irqrestore(&vpe->vpe_lock, flags);
+ }
+ 
++static struct irq_chip its_vpe_irq_chip;
++
+ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
++      struct its_vpe *vpe = NULL;
+       int cpu;
+ 
+-      if (map) {
+-              cpu = vpe_to_cpuid_lock(map->vpe, flags);
++      if (d->chip == &its_vpe_irq_chip) {
++              vpe = irq_data_get_irq_chip_data(d);
++      } else {
++              struct its_vlpi_map *map = get_vlpi_map(d);
++              if (map)
++                      vpe = map->vpe;
++      }
++
++      if (vpe) {
++              cpu = vpe_to_cpuid_lock(vpe, flags);
+       } else {
+               /* Physical LPIs are already locked via the irq_desc lock */
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+@@ -287,10 +297,18 @@ static int irq_to_cpuid_lock(struct irq_data *d, unsigned long *flags)
+ 
+ static void irq_to_cpuid_unlock(struct irq_data *d, unsigned long flags)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
++      struct its_vpe *vpe = NULL;
++
++      if (d->chip == &its_vpe_irq_chip) {
++              vpe = irq_data_get_irq_chip_data(d);
++      } else {
++              struct its_vlpi_map *map = get_vlpi_map(d);
++              if (map)
++                      vpe = map->vpe;
++      }
+ 
+-      if (map)
+-              vpe_to_cpuid_unlock(map->vpe, flags);
++      if (vpe)
++              vpe_to_cpuid_unlock(vpe, flags);
+ }
+ 
+ static struct its_collection *valid_col(struct its_collection *col)
+@@ -1427,14 +1445,29 @@ static void wait_for_syncr(void __iomem *rdbase)
+               cpu_relax();
+ }
+ 
+-static void direct_lpi_inv(struct irq_data *d)
++static void __direct_lpi_inv(struct irq_data *d, u64 val)
+ {
+-      struct its_vlpi_map *map = get_vlpi_map(d);
+       void __iomem *rdbase;
+       unsigned long flags;
+-      u64 val;
+       int cpu;
+ 
++      /* Target the redistributor this LPI is currently routed to */
++      cpu = irq_to_cpuid_lock(d, &flags);
++      raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
++
++      rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
++      gic_write_lpir(val, rdbase + GICR_INVLPIR);
++      wait_for_syncr(rdbase);
++
++      raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
++      irq_to_cpuid_unlock(d, flags);
++}
++
++static void direct_lpi_inv(struct irq_data *d)
++{
++      struct its_vlpi_map *map = get_vlpi_map(d);
++      u64 val;
++
+       if (map) {
+               struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+ 
+@@ -1447,15 +1480,7 @@ static void direct_lpi_inv(struct irq_data *d)
+               val = d->hwirq;
+       }
+ 
+-      /* Target the redistributor this LPI is currently routed to */
+-      cpu = irq_to_cpuid_lock(d, &flags);
+-      raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock);
+-      rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base;
+-      gic_write_lpir(val, rdbase + GICR_INVLPIR);
+-
+-      wait_for_syncr(rdbase);
+-      raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock);
+-      irq_to_cpuid_unlock(d, flags);
++      __direct_lpi_inv(d, val);
+ }
+ 
+ static void lpi_update_config(struct irq_data *d, u8 clr, u8 set)
+@@ -3936,18 +3961,10 @@ static void its_vpe_send_inv(struct irq_data *d)
+ {
+       struct its_vpe *vpe = irq_data_get_irq_chip_data(d);
+ 
+-      if (gic_rdists->has_direct_lpi) {
+-              void __iomem *rdbase;
+-
+-              /* Target the redistributor this VPE is currently known on */
+-              raw_spin_lock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+-              rdbase = per_cpu_ptr(gic_rdists->rdist, vpe->col_idx)->rd_base;
+-              gic_write_lpir(d->parent_data->hwirq, rdbase + GICR_INVLPIR);
+-              wait_for_syncr(rdbase);
+-              raw_spin_unlock(&gic_data_rdist_cpu(vpe->col_idx)->rd_lock);
+-      } else {
++      if (gic_rdists->has_direct_lpi)
++              __direct_lpi_inv(d, d->parent_data->hwirq);
++      else
+               its_vpe_send_cmd(vpe, its_send_inv);
+-      }
+ }
+ 
+ static void its_vpe_mask_irq(struct irq_data *d)
+-- 
+2.40.1
+
diff --git a/queue-5.15/kvm-x86-disallow-kvm_set_sregs-2-if-incoming-cr0-is-.patch b/queue-5.15/kvm-x86-disallow-kvm_set_sregs-2-if-incoming-cr0-is-.patch

new file mode 100644 (file)

index 0000000..02d3f7a
--- /dev/null
+++ b/queue-5.15/kvm-x86-disallow-kvm_set_sregs-2-if-incoming-cr0-is-.patch
@@ -0,0 +1,216 @@
+From 8ed6ebc265f28fa856de71c7f16be6e0795b744e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Jun 2023 13:30:35 -0700
+Subject: KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 26a0652cb453c72f6aab0974bc4939e9b14f886b ]
+
+Reject KVM_SET_SREGS{2} with -EINVAL if the incoming CR0 is invalid,
+e.g. due to setting bits 63:32, illegal combinations, or to a value that
+isn't allowed in VMX (non-)root mode.  The VMX checks in particular are
+"fun" as failure to disallow Real Mode for an L2 that is configured with
+unrestricted guest disabled, when KVM itself has unrestricted guest
+enabled, will result in KVM forcing VM86 mode to virtual Real Mode for
+L2, but then fail to unwind the related metadata when synthesizing a
+nested VM-Exit back to L1 (which has unrestricted guest enabled).
+
+Opportunistically fix a benign typo in the prototype for is_valid_cr4().
+
+Cc: stable@vger.kernel.org
+Reported-by: syzbot+5feef0b9ee9c8e9e5689@syzkaller.appspotmail.com
+Closes: https://lore.kernel.org/all/000000000000f316b705fdf6e2b4@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20230613203037.1968489-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm-x86-ops.h |  1 +
+ arch/x86/include/asm/kvm_host.h    |  3 ++-
+ arch/x86/kvm/svm/svm.c             |  6 ++++++
+ arch/x86/kvm/vmx/vmx.c             | 28 ++++++++++++++++++------
+ arch/x86/kvm/x86.c                 | 34 +++++++++++++++++++-----------
+ 5 files changed, 52 insertions(+), 20 deletions(-)
+
+diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
+index 23ea8a25cbbeb..4bdcb91478a51 100644
+--- a/arch/x86/include/asm/kvm-x86-ops.h
++++ b/arch/x86/include/asm/kvm-x86-ops.h
+@@ -34,6 +34,7 @@ KVM_X86_OP(get_segment)
+ KVM_X86_OP(get_cpl)
+ KVM_X86_OP(set_segment)
+ KVM_X86_OP_NULL(get_cs_db_l_bits)
++KVM_X86_OP(is_valid_cr0)
+ KVM_X86_OP(set_cr0)
+ KVM_X86_OP(is_valid_cr4)
+ KVM_X86_OP(set_cr4)
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 9e800d4d323c6..08cfc26ee7c67 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -1333,8 +1333,9 @@ struct kvm_x86_ops {
+       void (*set_segment)(struct kvm_vcpu *vcpu,
+                           struct kvm_segment *var, int seg);
+       void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
++      bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+       void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
+-      bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
++      bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+       void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
+       int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
+       void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 0611dac70c25c..302a4669c5a15 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -1734,6 +1734,11 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt)
+       vmcb_mark_dirty(svm->vmcb, VMCB_DT);
+ }
+ 
++static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++      return true;
++}
++
+ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+@@ -4596,6 +4601,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
+       .set_segment = svm_set_segment,
+       .get_cpl = svm_get_cpl,
+       .get_cs_db_l_bits = kvm_get_cs_db_l_bits,
++      .is_valid_cr0 = svm_is_valid_cr0,
+       .set_cr0 = svm_set_cr0,
+       .is_valid_cr4 = svm_is_valid_cr4,
+       .set_cr4 = svm_set_cr4,
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index 0841f9a34d1c2..89744ee06101a 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -2894,6 +2894,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+       struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
+ 
++      /*
++       * KVM should never use VM86 to virtualize Real Mode when L2 is active,
++       * as using VM86 is unnecessary if unrestricted guest is enabled, and
++       * if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
++       * should VM-Fail and KVM should reject userspace attempts to stuff
++       * CR0.PG=0 when L2 is active.
++       */
++      WARN_ON_ONCE(is_guest_mode(vcpu));
++
+       vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+       vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+       vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+@@ -3084,6 +3093,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
+ #define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
+                         CPU_BASED_CR3_STORE_EXITING)
+ 
++static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++      if (is_guest_mode(vcpu))
++              return nested_guest_cr0_valid(vcpu, cr0);
++
++      if (to_vmx(vcpu)->nested.vmxon)
++              return nested_host_cr0_valid(vcpu, cr0);
++
++      return true;
++}
++
+ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -5027,18 +5047,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
+               val = (val & ~vmcs12->cr0_guest_host_mask) |
+                       (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
+ 
+-              if (!nested_guest_cr0_valid(vcpu, val))
+-                      return 1;
+-
+               if (kvm_set_cr0(vcpu, val))
+                       return 1;
+               vmcs_writel(CR0_READ_SHADOW, orig_val);
+               return 0;
+       } else {
+-              if (to_vmx(vcpu)->nested.vmxon &&
+-                  !nested_host_cr0_valid(vcpu, val))
+-                      return 1;
+-
+               return kvm_set_cr0(vcpu, val);
+       }
+ }
+@@ -7744,6 +7757,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
+       .set_segment = vmx_set_segment,
+       .get_cpl = vmx_get_cpl,
+       .get_cs_db_l_bits = vmx_get_cs_db_l_bits,
++      .is_valid_cr0 = vmx_is_valid_cr0,
+       .set_cr0 = vmx_set_cr0,
+       .is_valid_cr4 = vmx_is_valid_cr4,
+       .set_cr4 = vmx_set_cr4,
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 7e1e3bc745622..285ba12be8ce3 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -876,6 +876,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3)
+ }
+ EXPORT_SYMBOL_GPL(load_pdptrs);
+ 
++static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
++{
++#ifdef CONFIG_X86_64
++      if (cr0 & 0xffffffff00000000UL)
++              return false;
++#endif
++
++      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
++              return false;
++
++      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
++              return false;
++
++      return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
++}
++
+ void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
+ {
+       if ((cr0 ^ old_cr0) & X86_CR0_PG) {
+@@ -898,20 +914,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+       unsigned long old_cr0 = kvm_read_cr0(vcpu);
+       unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG;
+ 
+-      cr0 |= X86_CR0_ET;
+-
+-#ifdef CONFIG_X86_64
+-      if (cr0 & 0xffffffff00000000UL)
++      if (!kvm_is_valid_cr0(vcpu, cr0))
+               return 1;
+-#endif
+-
+-      cr0 &= ~CR0_RESERVED_BITS;
+ 
+-      if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
+-              return 1;
++      cr0 |= X86_CR0_ET;
+ 
+-      if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
+-              return 1;
++      /* Write to CR0 reserved bits are ignored, even on Intel. */
++      cr0 &= ~CR0_RESERVED_BITS;
+ 
+ #ifdef CONFIG_X86_64
+       if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
+@@ -10643,7 +10652,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+                       return false;
+       }
+ 
+-      return kvm_is_valid_cr4(vcpu, sregs->cr4);
++      return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
++             kvm_is_valid_cr0(vcpu, sregs->cr0);
+ }
+ 
+ static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
+-- 
+2.40.1
+
diff --git a/queue-5.15/locking-rtmutex-fix-task-pi_waiters-integrity.patch b/queue-5.15/locking-rtmutex-fix-task-pi_waiters-integrity.patch

new file mode 100644 (file)

index 0000000..eb30da1
--- /dev/null
+++ b/queue-5.15/locking-rtmutex-fix-task-pi_waiters-integrity.patch
@@ -0,0 +1,614 @@
+From bd702e237b948c07f454a125338b2813b785f3d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 16:19:09 +0200
+Subject: locking/rtmutex: Fix task->pi_waiters integrity
+
+From: Peter Zijlstra <peterz@infradead.org>
+
+[ Upstream commit f7853c34241807bb97673a5e97719123be39a09e ]
+
+Henry reported that rt_mutex_adjust_prio_check() has an ordering
+problem and puts the lie to the comment in [7]. Sharing the sort key
+between lock->waiters and owner->pi_waiters *does* create problems,
+since unlike what the comment claims, holding [L] is insufficient.
+
+Notably, consider:
+
+       A
+      /   \
+     M1   M2
+     |     |
+     B     C
+
+That is, task A owns both M1 and M2, B and C block on them. In this
+case a concurrent chain walk (B & C) will modify their resp. sort keys
+in [7] while holding M1->wait_lock and M2->wait_lock. So holding [L]
+is meaningless, they're different Ls.
+
+This then gives rise to a race condition between [7] and [11], where
+the requeue of pi_waiters will observe an inconsistent tree order.
+
+       B                               C
+
+  (holds M1->wait_lock,                (holds M2->wait_lock,
+   holds B->pi_lock)            holds A->pi_lock)
+
+  [7]
+  waiter_update_prio();
+  ...
+  [8]
+  raw_spin_unlock(B->pi_lock);
+  ...
+  [10]
+  raw_spin_lock(A->pi_lock);
+
+                               [11]
+                               rt_mutex_enqueue_pi();
+                               // observes inconsistent A->pi_waiters
+                               // tree order
+
+Fixing this means either extending the range of the owner lock from
+[10-13] to [6-13], with the immediate problem that this means [6-8]
+hold both blocked and owner locks, or duplicating the sort key.
+
+Since the locking in chain walk is horrible enough without having to
+consider pi_lock nesting rules, duplicate the sort key instead.
+
+By giving each tree their own sort key, the above race becomes
+harmless, if C sees B at the old location, then B will correct things
+(if they need correcting) when it walks up the chain and reaches A.
+
+Fixes: fb00aca47440 ("rtmutex: Turn the plist into an rb-tree")
+Reported-by: Henry Wu <triangletrap12@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Thomas Gleixner <tglx@linutronix.de>
+Tested-by: Henry Wu <triangletrap12@gmail.com>
+Link: https://lkml.kernel.org/r/20230707161052.GF2883469%40hirez.programming.kicks-ass.net
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/locking/rtmutex.c        | 170 +++++++++++++++++++++-----------
+ kernel/locking/rtmutex_api.c    |   2 +-
+ kernel/locking/rtmutex_common.h |  47 ++++++---
+ kernel/locking/ww_mutex.h       |  12 +--
+ 4 files changed, 155 insertions(+), 76 deletions(-)
+
+diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
+index b7fa3ee3aa1de..ee5be1dda0c40 100644
+--- a/kernel/locking/rtmutex.c
++++ b/kernel/locking/rtmutex.c
+@@ -331,21 +331,43 @@ static __always_inline int __waiter_prio(struct task_struct *task)
+       return prio;
+ }
+ 
++/*
++ * Update the waiter->tree copy of the sort keys.
++ */
+ static __always_inline void
+ waiter_update_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
+ {
+-      waiter->prio = __waiter_prio(task);
+-      waiter->deadline = task->dl.deadline;
++      lockdep_assert_held(&waiter->lock->wait_lock);
++      lockdep_assert(RB_EMPTY_NODE(&waiter->tree.entry));
++
++      waiter->tree.prio = __waiter_prio(task);
++      waiter->tree.deadline = task->dl.deadline;
++}
++
++/*
++ * Update the waiter->pi_tree copy of the sort keys (from the tree copy).
++ */
++static __always_inline void
++waiter_clone_prio(struct rt_mutex_waiter *waiter, struct task_struct *task)
++{
++      lockdep_assert_held(&waiter->lock->wait_lock);
++      lockdep_assert_held(&task->pi_lock);
++      lockdep_assert(RB_EMPTY_NODE(&waiter->pi_tree.entry));
++
++      waiter->pi_tree.prio = waiter->tree.prio;
++      waiter->pi_tree.deadline = waiter->tree.deadline;
+ }
+ 
+ /*
+- * Only use with rt_mutex_waiter_{less,equal}()
++ * Only use with rt_waiter_node_{less,equal}()
+  */
++#define task_to_waiter_node(p)        \
++      &(struct rt_waiter_node){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
+ #define task_to_waiter(p)     \
+-      &(struct rt_mutex_waiter){ .prio = __waiter_prio(p), .deadline = (p)->dl.deadline }
++      &(struct rt_mutex_waiter){ .tree = *task_to_waiter_node(p) }
+ 
+-static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+-                                              struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_less(struct rt_waiter_node *left,
++                                             struct rt_waiter_node *right)
+ {
+       if (left->prio < right->prio)
+               return 1;
+@@ -362,8 +384,8 @@ static __always_inline int rt_mutex_waiter_less(struct rt_mutex_waiter *left,
+       return 0;
+ }
+ 
+-static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+-                                               struct rt_mutex_waiter *right)
++static __always_inline int rt_waiter_node_equal(struct rt_waiter_node *left,
++                                               struct rt_waiter_node *right)
+ {
+       if (left->prio != right->prio)
+               return 0;
+@@ -383,7 +405,7 @@ static __always_inline int rt_mutex_waiter_equal(struct rt_mutex_waiter *left,
+ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+                                 struct rt_mutex_waiter *top_waiter)
+ {
+-      if (rt_mutex_waiter_less(waiter, top_waiter))
++      if (rt_waiter_node_less(&waiter->tree, &top_waiter->tree))
+               return true;
+ 
+ #ifdef RT_MUTEX_BUILD_SPINLOCKS
+@@ -391,30 +413,30 @@ static inline bool rt_mutex_steal(struct rt_mutex_waiter *waiter,
+        * Note that RT tasks are excluded from same priority (lateral)
+        * steals to prevent the introduction of an unbounded latency.
+        */
+-      if (rt_prio(waiter->prio) || dl_prio(waiter->prio))
++      if (rt_prio(waiter->tree.prio) || dl_prio(waiter->tree.prio))
+               return false;
+ 
+-      return rt_mutex_waiter_equal(waiter, top_waiter);
++      return rt_waiter_node_equal(&waiter->tree, &top_waiter->tree);
+ #else
+       return false;
+ #endif
+ }
+ 
+ #define __node_2_waiter(node) \
+-      rb_entry((node), struct rt_mutex_waiter, tree_entry)
++      rb_entry((node), struct rt_mutex_waiter, tree.entry)
+ 
+ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+       struct rt_mutex_waiter *aw = __node_2_waiter(a);
+       struct rt_mutex_waiter *bw = __node_2_waiter(b);
+ 
+-      if (rt_mutex_waiter_less(aw, bw))
++      if (rt_waiter_node_less(&aw->tree, &bw->tree))
+               return 1;
+ 
+       if (!build_ww_mutex())
+               return 0;
+ 
+-      if (rt_mutex_waiter_less(bw, aw))
++      if (rt_waiter_node_less(&bw->tree, &aw->tree))
+               return 0;
+ 
+       /* NOTE: relies on waiter->ww_ctx being set before insertion */
+@@ -432,48 +454,58 @@ static __always_inline bool __waiter_less(struct rb_node *a, const struct rb_nod
+ static __always_inline void
+ rt_mutex_enqueue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+-      rb_add_cached(&waiter->tree_entry, &lock->waiters, __waiter_less);
++      lockdep_assert_held(&lock->wait_lock);
++
++      rb_add_cached(&waiter->tree.entry, &lock->waiters, __waiter_less);
+ }
+ 
+ static __always_inline void
+ rt_mutex_dequeue(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter)
+ {
+-      if (RB_EMPTY_NODE(&waiter->tree_entry))
++      lockdep_assert_held(&lock->wait_lock);
++
++      if (RB_EMPTY_NODE(&waiter->tree.entry))
+               return;
+ 
+-      rb_erase_cached(&waiter->tree_entry, &lock->waiters);
+-      RB_CLEAR_NODE(&waiter->tree_entry);
++      rb_erase_cached(&waiter->tree.entry, &lock->waiters);
++      RB_CLEAR_NODE(&waiter->tree.entry);
+ }
+ 
+-#define __node_2_pi_waiter(node) \
+-      rb_entry((node), struct rt_mutex_waiter, pi_tree_entry)
++#define __node_2_rt_node(node) \
++      rb_entry((node), struct rt_waiter_node, entry)
+ 
+-static __always_inline bool
+-__pi_waiter_less(struct rb_node *a, const struct rb_node *b)
++static __always_inline bool __pi_waiter_less(struct rb_node *a, const struct rb_node *b)
+ {
+-      return rt_mutex_waiter_less(__node_2_pi_waiter(a), __node_2_pi_waiter(b));
++      return rt_waiter_node_less(__node_2_rt_node(a), __node_2_rt_node(b));
+ }
+ 
+ static __always_inline void
+ rt_mutex_enqueue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+-      rb_add_cached(&waiter->pi_tree_entry, &task->pi_waiters, __pi_waiter_less);
++      lockdep_assert_held(&task->pi_lock);
++
++      rb_add_cached(&waiter->pi_tree.entry, &task->pi_waiters, __pi_waiter_less);
+ }
+ 
+ static __always_inline void
+ rt_mutex_dequeue_pi(struct task_struct *task, struct rt_mutex_waiter *waiter)
+ {
+-      if (RB_EMPTY_NODE(&waiter->pi_tree_entry))
++      lockdep_assert_held(&task->pi_lock);
++
++      if (RB_EMPTY_NODE(&waiter->pi_tree.entry))
+               return;
+ 
+-      rb_erase_cached(&waiter->pi_tree_entry, &task->pi_waiters);
+-      RB_CLEAR_NODE(&waiter->pi_tree_entry);
++      rb_erase_cached(&waiter->pi_tree.entry, &task->pi_waiters);
++      RB_CLEAR_NODE(&waiter->pi_tree.entry);
+ }
+ 
+-static __always_inline void rt_mutex_adjust_prio(struct task_struct *p)
++static __always_inline void rt_mutex_adjust_prio(struct rt_mutex_base *lock,
++                                               struct task_struct *p)
+ {
+       struct task_struct *pi_task = NULL;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++      lockdep_assert(rt_mutex_owner(lock) == p);
+       lockdep_assert_held(&p->pi_lock);
+ 
+       if (task_has_pi_waiters(p))
+@@ -562,9 +594,14 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+  * Chain walk basics and protection scope
+  *
+  * [R] refcount on task
+- * [P] task->pi_lock held
++ * [Pn] task->pi_lock held
+  * [L] rtmutex->wait_lock held
+  *
++ * Normal locking order:
++ *
++ *   rtmutex->wait_lock
++ *     task->pi_lock
++ *
+  * Step       Description                             Protected by
+  *    function arguments:
+  *    @task                                   [R]
+@@ -579,27 +616,32 @@ static __always_inline struct rt_mutex_base *task_blocked_on_lock(struct task_st
+  *    again:
+  *      loop_sanity_check();
+  *    retry:
+- * [1]          lock(task->pi_lock);                  [R] acquire [P]
+- * [2]          waiter = task->pi_blocked_on;         [P]
+- * [3]          check_exit_conditions_1();            [P]
+- * [4]          lock = waiter->lock;                  [P]
+- * [5]          if (!try_lock(lock->wait_lock)) {     [P] try to acquire [L]
+- *        unlock(task->pi_lock);              release [P]
++ * [1]          lock(task->pi_lock);                  [R] acquire [P1]
++ * [2]          waiter = task->pi_blocked_on;         [P1]
++ * [3]          check_exit_conditions_1();            [P1]
++ * [4]          lock = waiter->lock;                  [P1]
++ * [5]          if (!try_lock(lock->wait_lock)) {     [P1] try to acquire [L]
++ *        unlock(task->pi_lock);              release [P1]
+  *        goto retry;
+  *      }
+- * [6]          check_exit_conditions_2();            [P] + [L]
+- * [7]          requeue_lock_waiter(lock, waiter);    [P] + [L]
+- * [8]          unlock(task->pi_lock);                release [P]
++ * [6]          check_exit_conditions_2();            [P1] + [L]
++ * [7]          requeue_lock_waiter(lock, waiter);    [P1] + [L]
++ * [8]          unlock(task->pi_lock);                release [P1]
+  *      put_task_struct(task);                release [R]
+  * [9]          check_exit_conditions_3();            [L]
+  * [10]         task = owner(lock);                   [L]
+  *      get_task_struct(task);                [L] acquire [R]
+- *      lock(task->pi_lock);                  [L] acquire [P]
+- * [11]         requeue_pi_waiter(tsk, waiters(lock));[P] + [L]
+- * [12]         check_exit_conditions_4();            [P] + [L]
+- * [13]         unlock(task->pi_lock);                release [P]
++ *      lock(task->pi_lock);                  [L] acquire [P2]
++ * [11]         requeue_pi_waiter(tsk, waiters(lock));[P2] + [L]
++ * [12]         check_exit_conditions_4();            [P2] + [L]
++ * [13]         unlock(task->pi_lock);                release [P2]
+  *      unlock(lock->wait_lock);              release [L]
+  *      goto again;
++ *
++ * Where P1 is the blocking task and P2 is the lock owner; going up one step
++ * the owner becomes the next blocked task etc..
++ *
++*
+  */
+ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                                             enum rtmutex_chainwalk chwalk,
+@@ -747,7 +789,7 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+        * enabled we continue, but stop the requeueing in the chain
+        * walk.
+        */
+-      if (rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++      if (rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+               if (!detect_deadlock)
+                       goto out_unlock_pi;
+               else
+@@ -755,13 +797,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+       }
+ 
+       /*
+-       * [4] Get the next lock
++       * [4] Get the next lock; per holding task->pi_lock we can't unblock
++       * and guarantee @lock's existence.
+        */
+       lock = waiter->lock;
+       /*
+        * [5] We need to trylock here as we are holding task->pi_lock,
+        * which is the reverse lock order versus the other rtmutex
+        * operations.
++       *
++       * Per the above, holding task->pi_lock guarantees lock exists, so
++       * inverting this lock order is infeasible from a life-time
++       * perspective.
+        */
+       if (!raw_spin_trylock(&lock->wait_lock)) {
+               raw_spin_unlock_irq(&task->pi_lock);
+@@ -865,17 +912,18 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+        * or
+        *
+        *   DL CBS enforcement advancing the effective deadline.
+-       *
+-       * Even though pi_waiters also uses these fields, and that tree is only
+-       * updated in [11], we can do this here, since we hold [L], which
+-       * serializes all pi_waiters access and rb_erase() does not care about
+-       * the values of the node being removed.
+        */
+       waiter_update_prio(waiter, task);
+ 
+       rt_mutex_enqueue(lock, waiter);
+ 
+-      /* [8] Release the task */
++      /*
++       * [8] Release the (blocking) task in preparation for
++       * taking the owner task in [10].
++       *
++       * Since we hold lock->waiter_lock, task cannot unblock, even if we
++       * release task->pi_lock.
++       */
+       raw_spin_unlock(&task->pi_lock);
+       put_task_struct(task);
+ 
+@@ -899,7 +947,12 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+               return 0;
+       }
+ 
+-      /* [10] Grab the next task, i.e. the owner of @lock */
++      /*
++       * [10] Grab the next task, i.e. the owner of @lock
++       *
++       * Per holding lock->wait_lock and checking for !owner above, there
++       * must be an owner and it cannot go away.
++       */
+       task = get_task_struct(rt_mutex_owner(lock));
+       raw_spin_lock(&task->pi_lock);
+ 
+@@ -912,8 +965,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                * and adjust the priority of the owner.
+                */
+               rt_mutex_dequeue_pi(task, prerequeue_top_waiter);
++              waiter_clone_prio(waiter, task);
+               rt_mutex_enqueue_pi(task, waiter);
+-              rt_mutex_adjust_prio(task);
++              rt_mutex_adjust_prio(lock, task);
+ 
+       } else if (prerequeue_top_waiter == waiter) {
+               /*
+@@ -928,8 +982,9 @@ static int __sched rt_mutex_adjust_prio_chain(struct task_struct *task,
+                */
+               rt_mutex_dequeue_pi(task, waiter);
+               waiter = rt_mutex_top_waiter(lock);
++              waiter_clone_prio(waiter, task);
+               rt_mutex_enqueue_pi(task, waiter);
+-              rt_mutex_adjust_prio(task);
++              rt_mutex_adjust_prio(lock, task);
+       } else {
+               /*
+                * Nothing changed. No need to do any priority
+@@ -1142,6 +1197,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+       waiter->task = task;
+       waiter->lock = lock;
+       waiter_update_prio(waiter, task);
++      waiter_clone_prio(waiter, task);
+ 
+       /* Get the top priority waiter on the lock */
+       if (rt_mutex_has_waiters(lock))
+@@ -1175,7 +1231,7 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock,
+               rt_mutex_dequeue_pi(owner, top_waiter);
+               rt_mutex_enqueue_pi(owner, waiter);
+ 
+-              rt_mutex_adjust_prio(owner);
++              rt_mutex_adjust_prio(lock, owner);
+               if (owner->pi_blocked_on)
+                       chain_walk = 1;
+       } else if (rt_mutex_cond_detect_deadlock(waiter, chwalk)) {
+@@ -1222,6 +1278,8 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+ {
+       struct rt_mutex_waiter *waiter;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++
+       raw_spin_lock(&current->pi_lock);
+ 
+       waiter = rt_mutex_top_waiter(lock);
+@@ -1234,7 +1292,7 @@ static void __sched mark_wakeup_next_waiter(struct rt_wake_q_head *wqh,
+        * task unblocks.
+        */
+       rt_mutex_dequeue_pi(current, waiter);
+-      rt_mutex_adjust_prio(current);
++      rt_mutex_adjust_prio(lock, current);
+ 
+       /*
+        * As we are waking up the top waiter, and the waiter stays
+@@ -1471,7 +1529,7 @@ static void __sched remove_waiter(struct rt_mutex_base *lock,
+       if (rt_mutex_has_waiters(lock))
+               rt_mutex_enqueue_pi(owner, rt_mutex_top_waiter(lock));
+ 
+-      rt_mutex_adjust_prio(owner);
++      rt_mutex_adjust_prio(lock, owner);
+ 
+       /* Store the lock on which owner is blocked or NULL */
+       next_lock = task_blocked_on_lock(owner);
+diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c
+index a461be2f873db..56d1938cb52a1 100644
+--- a/kernel/locking/rtmutex_api.c
++++ b/kernel/locking/rtmutex_api.c
+@@ -437,7 +437,7 @@ void __sched rt_mutex_adjust_pi(struct task_struct *task)
+       raw_spin_lock_irqsave(&task->pi_lock, flags);
+ 
+       waiter = task->pi_blocked_on;
+-      if (!waiter || rt_mutex_waiter_equal(waiter, task_to_waiter(task))) {
++      if (!waiter || rt_waiter_node_equal(&waiter->tree, task_to_waiter_node(task))) {
+               raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+               return;
+       }
+diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h
+index c47e8361bfb5c..1162e07cdaea1 100644
+--- a/kernel/locking/rtmutex_common.h
++++ b/kernel/locking/rtmutex_common.h
+@@ -17,27 +17,44 @@
+ #include <linux/rtmutex.h>
+ #include <linux/sched/wake_q.h>
+ 
++
++/*
++ * This is a helper for the struct rt_mutex_waiter below. A waiter goes in two
++ * separate trees and they need their own copy of the sort keys because of
++ * different locking requirements.
++ *
++ * @entry:            rbtree node to enqueue into the waiters tree
++ * @prio:             Priority of the waiter
++ * @deadline:         Deadline of the waiter if applicable
++ *
++ * See rt_waiter_node_less() and waiter_*_prio().
++ */
++struct rt_waiter_node {
++      struct rb_node  entry;
++      int             prio;
++      u64             deadline;
++};
++
+ /*
+  * This is the control structure for tasks blocked on a rt_mutex,
+  * which is allocated on the kernel stack on of the blocked task.
+  *
+- * @tree_entry:               pi node to enqueue into the mutex waiters tree
+- * @pi_tree_entry:    pi node to enqueue into the mutex owner waiters tree
++ * @tree:             node to enqueue into the mutex waiters tree
++ * @pi_tree:          node to enqueue into the mutex owner waiters tree
+  * @task:             task reference to the blocked task
+  * @lock:             Pointer to the rt_mutex on which the waiter blocks
+  * @wake_state:               Wakeup state to use (TASK_NORMAL or TASK_RTLOCK_WAIT)
+- * @prio:             Priority of the waiter
+- * @deadline:         Deadline of the waiter if applicable
+  * @ww_ctx:           WW context pointer
++ *
++ * @tree is ordered by @lock->wait_lock
++ * @pi_tree is ordered by rt_mutex_owner(@lock)->pi_lock
+  */
+ struct rt_mutex_waiter {
+-      struct rb_node          tree_entry;
+-      struct rb_node          pi_tree_entry;
++      struct rt_waiter_node   tree;
++      struct rt_waiter_node   pi_tree;
+       struct task_struct      *task;
+       struct rt_mutex_base    *lock;
+       unsigned int            wake_state;
+-      int                     prio;
+-      u64                     deadline;
+       struct ww_acquire_ctx   *ww_ctx;
+ };
+ 
+@@ -105,7 +122,7 @@ static inline bool rt_mutex_waiter_is_top_waiter(struct rt_mutex_base *lock,
+ {
+       struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+ 
+-      return rb_entry(leftmost, struct rt_mutex_waiter, tree_entry) == waiter;
++      return rb_entry(leftmost, struct rt_mutex_waiter, tree.entry) == waiter;
+ }
+ 
+ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *lock)
+@@ -113,8 +130,10 @@ static inline struct rt_mutex_waiter *rt_mutex_top_waiter(struct rt_mutex_base *
+       struct rb_node *leftmost = rb_first_cached(&lock->waiters);
+       struct rt_mutex_waiter *w = NULL;
+ 
++      lockdep_assert_held(&lock->wait_lock);
++
+       if (leftmost) {
+-              w = rb_entry(leftmost, struct rt_mutex_waiter, tree_entry);
++              w = rb_entry(leftmost, struct rt_mutex_waiter, tree.entry);
+               BUG_ON(w->lock != lock);
+       }
+       return w;
+@@ -127,8 +146,10 @@ static inline int task_has_pi_waiters(struct task_struct *p)
+ 
+ static inline struct rt_mutex_waiter *task_top_pi_waiter(struct task_struct *p)
+ {
++      lockdep_assert_held(&p->pi_lock);
++
+       return rb_entry(p->pi_waiters.rb_leftmost, struct rt_mutex_waiter,
+-                      pi_tree_entry);
++                      pi_tree.entry);
+ }
+ 
+ #define RT_MUTEX_HAS_WAITERS  1UL
+@@ -190,8 +211,8 @@ static inline void debug_rt_mutex_free_waiter(struct rt_mutex_waiter *waiter)
+ static inline void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter)
+ {
+       debug_rt_mutex_init_waiter(waiter);
+-      RB_CLEAR_NODE(&waiter->pi_tree_entry);
+-      RB_CLEAR_NODE(&waiter->tree_entry);
++      RB_CLEAR_NODE(&waiter->pi_tree.entry);
++      RB_CLEAR_NODE(&waiter->tree.entry);
+       waiter->wake_state = TASK_NORMAL;
+       waiter->task = NULL;
+ }
+diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h
+index 56f139201f246..3ad2cc4823e59 100644
+--- a/kernel/locking/ww_mutex.h
++++ b/kernel/locking/ww_mutex.h
+@@ -96,25 +96,25 @@ __ww_waiter_first(struct rt_mutex *lock)
+       struct rb_node *n = rb_first(&lock->rtmutex.waiters.rb_root);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_next(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+-      struct rb_node *n = rb_next(&w->tree_entry);
++      struct rb_node *n = rb_next(&w->tree.entry);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+ __ww_waiter_prev(struct rt_mutex *lock, struct rt_mutex_waiter *w)
+ {
+-      struct rb_node *n = rb_prev(&w->tree_entry);
++      struct rb_node *n = rb_prev(&w->tree.entry);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline struct rt_mutex_waiter *
+@@ -123,7 +123,7 @@ __ww_waiter_last(struct rt_mutex *lock)
+       struct rb_node *n = rb_last(&lock->rtmutex.waiters.rb_root);
+       if (!n)
+               return NULL;
+-      return rb_entry(n, struct rt_mutex_waiter, tree_entry);
++      return rb_entry(n, struct rt_mutex_waiter, tree.entry);
+ }
+ 
+ static inline void
+-- 
+2.40.1
+
diff --git a/queue-5.15/series b/queue-5.15/series

index f9a7f1df5f087085c90a2e39eb7d7570fda53bbb..b01fc80157f1b52cf9cc5a4d5294f54df545dbec 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -133,3 +133,7 @@ btrfs-check-for-commit-error-at-btrfs_attach_transaction_barrier.patch
  file-always-lock-position-for-fmode_atomic_pos.patch
  nfsd-remove-incorrect-check-in-nfsd4_validate_stateid.patch
  tpm_tis-explicitly-check-for-error-code.patch
+irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch
+irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch
+locking-rtmutex-fix-task-pi_waiters-integrity.patch
+kvm-x86-disallow-kvm_set_sregs-2-if-incoming-cr0-is-.patch
author	Sasha Levin <sashal@kernel.org>
	Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Tue, 1 Aug 2023 01:06:55 +0000 (21:06 -0400)
queue-5.15/irq-bcm6345-l1-do-not-assume-a-fixed-block-to-cpu-ma.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/irqchip-gic-v4.1-properly-lock-vpes-when-doing-a-dir.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/kvm-x86-disallow-kvm_set_sregs-2-if-incoming-cr0-is-.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/locking-rtmutex-fix-task-pi_waiters-integrity.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history