From: Greg Kroah-Hartman Date: Tue, 12 May 2020 13:28:18 +0000 (+0200) Subject: 5.6-stable patches X-Git-Tag: v4.19.123~18 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=b72c210e1f369faae9d490d2210ca95a0feb19d8;p=thirdparty%2Fkernel%2Fstable-queue.git 5.6-stable patches added patches: arch-x86-kvm-svm-sev.c-change-flag-passed-to-gup-fast-in-sev_pin_memory.patch iommu-virtio-reverse-arguments-to-list_add.patch kvm-ioapic-restrict-lazy-eoi-update-to-edge-triggered-interrupts.patch kvm-x86-fixes-posted-interrupt-check-for-irqs-delivery-modes.patch kvm-x86-use-kvm-cpu-capabilities-to-determine-cr4-reserved-bits.patch mm-memcg-fix-error-return-value-of-mem_cgroup_css_alloc.patch netfilter-nat-never-update-the-udp-checksum-when-it-s-0.patch netfilter-nf_osf-avoid-passing-pointer-to-local-var.patch objtool-fix-stack-offset-tracking-for-indirect-cfas.patch scripts-decodecode-fix-trapping-instruction-formatting.patch x86-entry-64-fix-unwind-hints-in-__switch_to_asm.patch x86-entry-64-fix-unwind-hints-in-kernel-exit-path.patch x86-entry-64-fix-unwind-hints-in-register-clearing-code.patch x86-entry-64-fix-unwind-hints-in-rewind_stack_do_exit.patch x86-mm-cpa-flush-direct-map-alias-during-cpa.patch x86-unwind-orc-don-t-skip-the-first-frame-for-inactive-tasks.patch x86-unwind-orc-fix-error-path-for-bad-orc-entry-type.patch x86-unwind-orc-fix-premature-unwind-stoppage-due-to-iret-frames.patch x86-unwind-orc-prevent-unwinding-before-orc-initialization.patch --- diff --git a/queue-5.6/arch-x86-kvm-svm-sev.c-change-flag-passed-to-gup-fast-in-sev_pin_memory.patch b/queue-5.6/arch-x86-kvm-svm-sev.c-change-flag-passed-to-gup-fast-in-sev_pin_memory.patch new file mode 100644 index 00000000000..a86808c1d3a --- /dev/null +++ b/queue-5.6/arch-x86-kvm-svm-sev.c-change-flag-passed-to-gup-fast-in-sev_pin_memory.patch @@ -0,0 +1,53 @@ +From 996ed22c7a5251d76dcdfe5026ef8230e90066d9 Mon Sep 17 00:00:00 2001 +From: Janakarajan Natarajan +Date: Thu, 7 May 2020 18:35:56 -0700 +Subject: arch/x86/kvm/svm/sev.c: change flag passed to GUP fast in sev_pin_memory() + +From: Janakarajan Natarajan + +commit 996ed22c7a5251d76dcdfe5026ef8230e90066d9 upstream. + +When trying to lock read-only pages, sev_pin_memory() fails because +FOLL_WRITE is used as the flag for get_user_pages_fast(). + +Commit 73b0140bf0fe ("mm/gup: change GUP fast to use flags rather than a +write 'bool'") updated the get_user_pages_fast() call sites to use +flags, but incorrectly updated the call in sev_pin_memory(). As the +original coding of this call was correct, revert the change made by that +commit. + +Fixes: 73b0140bf0fe ("mm/gup: change GUP fast to use flags rather than a write 'bool'") +Signed-off-by: Janakarajan Natarajan +Signed-off-by: Andrew Morton +Reviewed-by: Ira Weiny +Cc: Paolo Bonzini +Cc: Sean Christopherson +Cc: Vitaly Kuznetsov +Cc: Wanpeng Li +Cc: Jim Mattson +Cc: Joerg Roedel +Cc: Thomas Gleixner +Cc: Ingo Molnar +Cc: Borislav Petkov +Cc: "H . Peter Anvin" +Cc: Mike Marshall +Cc: Brijesh Singh +Link: http://lkml.kernel.org/r/20200423152419.87202-1-Janakarajan.Natarajan@amd.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/svm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -1886,7 +1886,7 @@ static struct page **sev_pin_memory(stru + return NULL; + + /* Pin the user virtual address. */ +- npinned = get_user_pages_fast(uaddr, npages, FOLL_WRITE, pages); ++ npinned = get_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages); + if (npinned != npages) { + pr_err("SEV: Failure locking %lu pages.\n", npages); + goto err; diff --git a/queue-5.6/ext4-don-t-set-dioread_nolock-by-default-for-blocksi.patch b/queue-5.6/ext4-don-t-set-dioread_nolock-by-default-for-blocksi.patch index b3f0ce575ee..fd3b4534e47 100644 --- a/queue-5.6/ext4-don-t-set-dioread_nolock-by-default-for-blocksi.patch +++ b/queue-5.6/ext4-don-t-set-dioread_nolock-by-default-for-blocksi.patch @@ -5,6 +5,8 @@ Subject: ext4: don't set dioread_nolock by default for blocksize < pagesize From: Ritesh Harjani +commit 626b035b816b61a7a7b4d2205a6807e2f11a18c1 upstream. + Currently on calling echo 3 > drop_caches on host machine, we see FS corruption in the guest. This happens on Power machine where blocksize < pagesize. @@ -19,6 +21,7 @@ Reported-by: Aneesh Kumar K.V Signed-off-by: Ritesh Harjani Link: https://lore.kernel.org/r/20200327200744.12473-1-riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o +Signed-off-by: Greg Kroah-Hartman --- fs/ext4/super.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/queue-5.6/iommu-virtio-reverse-arguments-to-list_add.patch b/queue-5.6/iommu-virtio-reverse-arguments-to-list_add.patch new file mode 100644 index 00000000000..9f1411bad97 --- /dev/null +++ b/queue-5.6/iommu-virtio-reverse-arguments-to-list_add.patch @@ -0,0 +1,38 @@ +From fb3637a113349f53830f7d6ca45891b7192cd28f Mon Sep 17 00:00:00 2001 +From: Julia Lawall +Date: Tue, 5 May 2020 20:47:47 +0200 +Subject: iommu/virtio: Reverse arguments to list_add + +From: Julia Lawall + +commit fb3637a113349f53830f7d6ca45891b7192cd28f upstream. + +Elsewhere in the file, there is a list_for_each_entry with +&vdev->resv_regions as the second argument, suggesting that +&vdev->resv_regions is the list head. So exchange the +arguments on the list_add call to put the list head in the +second argument. + +Fixes: 2a5a31487445 ("iommu/virtio: Add probe request") +Signed-off-by: Julia Lawall +Signed-off-by: Greg Kroah-Hartman + +Reviewed-by: Jean-Philippe Brucker +Link: https://lore.kernel.org/r/1588704467-13431-1-git-send-email-Julia.Lawall@inria.fr +Signed-off-by: Joerg Roedel + +--- + drivers/iommu/virtio-iommu.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/iommu/virtio-iommu.c ++++ b/drivers/iommu/virtio-iommu.c +@@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct vi + if (!region) + return -ENOMEM; + +- list_add(&vdev->resv_regions, ®ion->list); ++ list_add(®ion->list, &vdev->resv_regions); + return 0; + } + diff --git a/queue-5.6/kvm-ioapic-restrict-lazy-eoi-update-to-edge-triggered-interrupts.patch b/queue-5.6/kvm-ioapic-restrict-lazy-eoi-update-to-edge-triggered-interrupts.patch new file mode 100644 index 00000000000..08b4fe89810 --- /dev/null +++ b/queue-5.6/kvm-ioapic-restrict-lazy-eoi-update-to-edge-triggered-interrupts.patch @@ -0,0 +1,79 @@ +From 8be8f932e3db5fe4ed178b8892eeffeab530273a Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Mon, 4 May 2020 12:19:45 -0400 +Subject: kvm: ioapic: Restrict lazy EOI update to edge-triggered interrupts + +From: Paolo Bonzini + +commit 8be8f932e3db5fe4ed178b8892eeffeab530273a upstream. + +Commit f458d039db7e ("kvm: ioapic: Lazy update IOAPIC EOI") introduces +the following infinite loop: + +BUG: stack guard page was hit at 000000008f595917 \ +(stack is 00000000bdefe5a4..00000000ae2b06f5) +kernel stack overflow (double-fault): 0000 [#1] SMP NOPTI +RIP: 0010:kvm_set_irq+0x51/0x160 [kvm] +Call Trace: + irqfd_resampler_ack+0x32/0x90 [kvm] + kvm_notify_acked_irq+0x62/0xd0 [kvm] + kvm_ioapic_update_eoi_one.isra.0+0x30/0x120 [kvm] + ioapic_set_irq+0x20e/0x240 [kvm] + kvm_ioapic_set_irq+0x5c/0x80 [kvm] + kvm_set_irq+0xbb/0x160 [kvm] + ? kvm_hv_set_sint+0x20/0x20 [kvm] + irqfd_resampler_ack+0x32/0x90 [kvm] + kvm_notify_acked_irq+0x62/0xd0 [kvm] + kvm_ioapic_update_eoi_one.isra.0+0x30/0x120 [kvm] + ioapic_set_irq+0x20e/0x240 [kvm] + kvm_ioapic_set_irq+0x5c/0x80 [kvm] + kvm_set_irq+0xbb/0x160 [kvm] + ? kvm_hv_set_sint+0x20/0x20 [kvm] +.... + +The re-entrancy happens because the irq state is the OR of +the interrupt state and the resamplefd state. That is, we don't +want to show the state as 0 until we've had a chance to set the +resamplefd. But if the interrupt has _not_ gone low then +ioapic_set_irq is invoked again, causing an infinite loop. + +This can only happen for a level-triggered interrupt, otherwise +irqfd_inject would immediately set the KVM_USERSPACE_IRQ_SOURCE_ID high +and then low. Fortunately, in the case of level-triggered interrupts the VMEXIT already happens because +TMR is set. Thus, fix the bug by restricting the lazy invocation +of the ack notifier to edge-triggered interrupts, the only ones that +need it. + +Tested-by: Suravee Suthikulpanit +Reported-by: borisvk@bstnet.org +Suggested-by: Paolo Bonzini +Link: https://www.spinics.net/lists/kvm/msg213512.html +Fixes: f458d039db7e ("kvm: ioapic: Lazy update IOAPIC EOI") +Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=207489 +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kvm/ioapic.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +--- a/arch/x86/kvm/ioapic.c ++++ b/arch/x86/kvm/ioapic.c +@@ -225,12 +225,12 @@ static int ioapic_set_irq(struct kvm_ioa + } + + /* +- * AMD SVM AVIC accelerate EOI write and do not trap, +- * in-kernel IOAPIC will not be able to receive the EOI. +- * In this case, we do lazy update of the pending EOI when +- * trying to set IOAPIC irq. ++ * AMD SVM AVIC accelerate EOI write iff the interrupt is edge ++ * triggered, in which case the in-kernel IOAPIC will not be able ++ * to receive the EOI. In this case, we do a lazy update of the ++ * pending EOI when trying to set IOAPIC irq. + */ +- if (kvm_apicv_activated(ioapic->kvm)) ++ if (edge && kvm_apicv_activated(ioapic->kvm)) + ioapic_lazy_update_eoi(ioapic, irq); + + /* diff --git a/queue-5.6/kvm-x86-fixes-posted-interrupt-check-for-irqs-delivery-modes.patch b/queue-5.6/kvm-x86-fixes-posted-interrupt-check-for-irqs-delivery-modes.patch new file mode 100644 index 00000000000..eb85354adc1 --- /dev/null +++ b/queue-5.6/kvm-x86-fixes-posted-interrupt-check-for-irqs-delivery-modes.patch @@ -0,0 +1,41 @@ +From 637543a8d61c6afe4e9be64bfb43c78701a83375 Mon Sep 17 00:00:00 2001 +From: Suravee Suthikulpanit +Date: Tue, 7 Apr 2020 01:13:09 -0500 +Subject: KVM: x86: Fixes posted interrupt check for IRQs delivery modes + +From: Suravee Suthikulpanit + +commit 637543a8d61c6afe4e9be64bfb43c78701a83375 upstream. + +Current logic incorrectly uses the enum ioapic_irq_destination_types +to check the posted interrupt destination types. However, the value was +set using APIC_DM_XXX macros, which are left-shifted by 8 bits. + +Fixes by using the APIC_DM_FIXED and APIC_DM_LOWEST instead. + +Fixes: (fdcf75621375 'KVM: x86: Disable posted interrupts for non-standard IRQs delivery modes') +Cc: Alexander Graf +Signed-off-by: Suravee Suthikulpanit +Message-Id: <1586239989-58305-1-git-send-email-suravee.suthikulpanit@amd.com> +Reviewed-by: Maxim Levitsky +Tested-by: Maxim Levitsky +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/include/asm/kvm_host.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -1664,8 +1664,8 @@ void kvm_set_msi_irq(struct kvm *kvm, st + static inline bool kvm_irq_is_postable(struct kvm_lapic_irq *irq) + { + /* We can only post Fixed and LowPrio IRQs */ +- return (irq->delivery_mode == dest_Fixed || +- irq->delivery_mode == dest_LowestPrio); ++ return (irq->delivery_mode == APIC_DM_FIXED || ++ irq->delivery_mode == APIC_DM_LOWEST); + } + + static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) diff --git a/queue-5.6/kvm-x86-use-kvm-cpu-capabilities-to-determine-cr4-reserved-bits.patch b/queue-5.6/kvm-x86-use-kvm-cpu-capabilities-to-determine-cr4-reserved-bits.patch new file mode 100644 index 00000000000..a3ec5c80b28 --- /dev/null +++ b/queue-5.6/kvm-x86-use-kvm-cpu-capabilities-to-determine-cr4-reserved-bits.patch @@ -0,0 +1,67 @@ +From 139f7425fdf54f054463e7524b9f54c41af8407f Mon Sep 17 00:00:00 2001 +From: Paolo Bonzini +Date: Tue, 5 May 2020 09:40:46 -0400 +Subject: kvm: x86: Use KVM CPU capabilities to determine CR4 reserved bits + +From: Paolo Bonzini + +commit 139f7425fdf54f054463e7524b9f54c41af8407f upstream. + +Using CPUID data can be useful for the processor compatibility +check, but that's it. Using it to compute guest-reserved bits +can have both false positives (such as LA57 and UMIP which we +are already handling) and false negatives: in particular, with +this patch we don't allow anymore a KVM guest to set CR4.PKE +when CR4.PKE is clear on the host. + +Fixes: b9dd21e104bc ("KVM: x86: simplify handling of PKRU") +Reported-by: Jim Mattson +Tested-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Greg Kroah-Hartman + +diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c +index c5835f9cb9ad..8d296e3d0d56 100644 +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -926,19 +926,6 @@ EXPORT_SYMBOL_GPL(kvm_set_xcr); + __reserved_bits; \ + }) + +-static u64 kvm_host_cr4_reserved_bits(struct cpuinfo_x86 *c) +-{ +- u64 reserved_bits = __cr4_reserved_bits(cpu_has, c); +- +- if (kvm_cpu_cap_has(X86_FEATURE_LA57)) +- reserved_bits &= ~X86_CR4_LA57; +- +- if (kvm_cpu_cap_has(X86_FEATURE_UMIP)) +- reserved_bits &= ~X86_CR4_UMIP; +- +- return reserved_bits; +-} +- + static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) + { + if (cr4 & cr4_reserved_bits) +@@ -9675,7 +9662,9 @@ int kvm_arch_hardware_setup(void *opaque) + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES)) + supported_xss = 0; + +- cr4_reserved_bits = kvm_host_cr4_reserved_bits(&boot_cpu_data); ++#define __kvm_cpu_cap_has(UNUSED_, f) kvm_cpu_cap_has(f) ++ cr4_reserved_bits = __cr4_reserved_bits(__kvm_cpu_cap_has, UNUSED_); ++#undef __kvm_cpu_cap_has + + if (kvm_has_tsc_control) { + /* +@@ -9707,7 +9696,8 @@ int kvm_arch_check_processor_compat(void *opaque) + + WARN_ON(!irqs_disabled()); + +- if (kvm_host_cr4_reserved_bits(c) != cr4_reserved_bits) ++ if (__cr4_reserved_bits(cpu_has, c) != ++ __cr4_reserved_bits(cpu_has, &boot_cpu_data)) + return -EIO; + + return ops->check_processor_compatibility(); diff --git a/queue-5.6/mm-memcg-fix-error-return-value-of-mem_cgroup_css_alloc.patch b/queue-5.6/mm-memcg-fix-error-return-value-of-mem_cgroup_css_alloc.patch new file mode 100644 index 00000000000..325cd7810a8 --- /dev/null +++ b/queue-5.6/mm-memcg-fix-error-return-value-of-mem_cgroup_css_alloc.patch @@ -0,0 +1,99 @@ +From 11d6761218d19ca06ae5387f4e3692c4fa9e7493 Mon Sep 17 00:00:00 2001 +From: Yafang Shao +Date: Thu, 7 May 2020 18:35:43 -0700 +Subject: mm, memcg: fix error return value of mem_cgroup_css_alloc() + +From: Yafang Shao + +commit 11d6761218d19ca06ae5387f4e3692c4fa9e7493 upstream. + +When I run my memcg testcase which creates lots of memcgs, I found +there're unexpected out of memory logs while there're still enough +available free memory. The error log is + + mkdir: cannot create directory 'foo.65533': Cannot allocate memory + +The reason is when we try to create more than MEM_CGROUP_ID_MAX memcgs, +an -ENOMEM errno will be set by mem_cgroup_css_alloc(), but the right +errno should be -ENOSPC "No space left on device", which is an +appropriate errno for userspace's failed mkdir. + +As the errno really misled me, we should make it right. After this +patch, the error log will be + + mkdir: cannot create directory 'foo.65533': No space left on device + +[akpm@linux-foundation.org: s/EBUSY/ENOSPC/, per Michal] +[akpm@linux-foundation.org: s/EBUSY/ENOSPC/, per Michal] +Fixes: 73f576c04b94 ("mm: memcontrol: fix cgroup creation failure after many small jobs") +Suggested-by: Matthew Wilcox +Signed-off-by: Yafang Shao +Signed-off-by: Andrew Morton +Acked-by: Michal Hocko +Acked-by: Johannes Weiner +Cc: Vladimir Davydov +Link: http://lkml.kernel.org/r/20200407063621.GA18914@dhcp22.suse.cz +Link: http://lkml.kernel.org/r/1586192163-20099-1-git-send-email-laoar.shao@gmail.com +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + mm/memcontrol.c | 15 +++++++++------ + 1 file changed, 9 insertions(+), 6 deletions(-) + +--- a/mm/memcontrol.c ++++ b/mm/memcontrol.c +@@ -4977,19 +4977,22 @@ static struct mem_cgroup *mem_cgroup_all + unsigned int size; + int node; + int __maybe_unused i; ++ long error = -ENOMEM; + + size = sizeof(struct mem_cgroup); + size += nr_node_ids * sizeof(struct mem_cgroup_per_node *); + + memcg = kzalloc(size, GFP_KERNEL); + if (!memcg) +- return NULL; ++ return ERR_PTR(error); + + memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, + 1, MEM_CGROUP_ID_MAX, + GFP_KERNEL); +- if (memcg->id.id < 0) ++ if (memcg->id.id < 0) { ++ error = memcg->id.id; + goto fail; ++ } + + memcg->vmstats_local = alloc_percpu(struct memcg_vmstats_percpu); + if (!memcg->vmstats_local) +@@ -5033,7 +5036,7 @@ static struct mem_cgroup *mem_cgroup_all + fail: + mem_cgroup_id_remove(memcg); + __mem_cgroup_free(memcg); +- return NULL; ++ return ERR_PTR(error); + } + + static struct cgroup_subsys_state * __ref +@@ -5044,8 +5047,8 @@ mem_cgroup_css_alloc(struct cgroup_subsy + long error = -ENOMEM; + + memcg = mem_cgroup_alloc(); +- if (!memcg) +- return ERR_PTR(error); ++ if (IS_ERR(memcg)) ++ return ERR_CAST(memcg); + + memcg->high = PAGE_COUNTER_MAX; + memcg->soft_limit = PAGE_COUNTER_MAX; +@@ -5095,7 +5098,7 @@ mem_cgroup_css_alloc(struct cgroup_subsy + fail: + mem_cgroup_id_remove(memcg); + mem_cgroup_free(memcg); +- return ERR_PTR(-ENOMEM); ++ return ERR_PTR(error); + } + + static int mem_cgroup_css_online(struct cgroup_subsys_state *css) diff --git a/queue-5.6/netfilter-nat-never-update-the-udp-checksum-when-it-s-0.patch b/queue-5.6/netfilter-nat-never-update-the-udp-checksum-when-it-s-0.patch new file mode 100644 index 00000000000..94c286cd963 --- /dev/null +++ b/queue-5.6/netfilter-nat-never-update-the-udp-checksum-when-it-s-0.patch @@ -0,0 +1,67 @@ +From ea64d8d6c675c0bb712689b13810301de9d8f77a Mon Sep 17 00:00:00 2001 +From: Guillaume Nault +Date: Tue, 21 Apr 2020 02:42:19 +0200 +Subject: netfilter: nat: never update the UDP checksum when it's 0 + +From: Guillaume Nault + +commit ea64d8d6c675c0bb712689b13810301de9d8f77a upstream. + +If the UDP header of a local VXLAN endpoint is NAT-ed, and the VXLAN +device has disabled UDP checksums and enabled Tx checksum offloading, +then the skb passed to udp_manip_pkt() has hdr->check == 0 (outer +checksum disabled) and skb->ip_summed == CHECKSUM_PARTIAL (inner packet +checksum offloaded). + +Because of the ->ip_summed value, udp_manip_pkt() tries to update the +outer checksum with the new address and port, leading to an invalid +checksum sent on the wire, as the original null checksum obviously +didn't take the old address and port into account. + +So, we can't take ->ip_summed into account in udp_manip_pkt(), as it +might not refer to the checksum we're acting on. Instead, we can base +the decision to update the UDP checksum entirely on the value of +hdr->check, because it's null if and only if checksum is disabled: + + * A fully computed checksum can't be 0, since a 0 checksum is + represented by the CSUM_MANGLED_0 value instead. + + * A partial checksum can't be 0, since the pseudo-header always adds + at least one non-zero value (the UDP protocol type 0x11) and adding + more values to the sum can't make it wrap to 0 as the carry is then + added to the wrapped number. + + * A disabled checksum uses the special value 0. + +The problem seems to be there from day one, although it was probably +not visible before UDP tunnels were implemented. + +Fixes: 5b1158e909ec ("[NETFILTER]: Add NAT support for nf_conntrack") +Signed-off-by: Guillaume Nault +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nf_nat_proto.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +--- a/net/netfilter/nf_nat_proto.c ++++ b/net/netfilter/nf_nat_proto.c +@@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff + enum nf_nat_manip_type maniptype) + { + struct udphdr *hdr; +- bool do_csum; + + if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct udphdr *)(skb->data + hdroff); +- do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; ++ __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check); + +- __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum); + return true; + } + diff --git a/queue-5.6/netfilter-nf_osf-avoid-passing-pointer-to-local-var.patch b/queue-5.6/netfilter-nf_osf-avoid-passing-pointer-to-local-var.patch new file mode 100644 index 00000000000..1923ac6f1aa --- /dev/null +++ b/queue-5.6/netfilter-nf_osf-avoid-passing-pointer-to-local-var.patch @@ -0,0 +1,75 @@ +From c165d57b552aaca607fa5daf3fb524a6efe3c5a3 Mon Sep 17 00:00:00 2001 +From: Arnd Bergmann +Date: Wed, 29 Apr 2020 21:00:41 +0200 +Subject: netfilter: nf_osf: avoid passing pointer to local var + +From: Arnd Bergmann + +commit c165d57b552aaca607fa5daf3fb524a6efe3c5a3 upstream. + +gcc-10 points out that a code path exists where a pointer to a stack +variable may be passed back to the caller: + +net/netfilter/nfnetlink_osf.c: In function 'nf_osf_hdr_ctx_init': +cc1: warning: function may return address of local variable [-Wreturn-local-addr] +net/netfilter/nfnetlink_osf.c:171:16: note: declared here + 171 | struct tcphdr _tcph; + | ^~~~~ + +I am not sure whether this can happen in practice, but moving the +variable declaration into the callers avoids the problem. + +Fixes: 31a9c29210e2 ("netfilter: nf_osf: add struct nf_osf_hdr_ctx") +Signed-off-by: Arnd Bergmann +Reviewed-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Greg Kroah-Hartman + +--- + net/netfilter/nfnetlink_osf.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/net/netfilter/nfnetlink_osf.c ++++ b/net/netfilter/nfnetlink_osf.c +@@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struc + static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, + const struct sk_buff *skb, + const struct iphdr *ip, +- unsigned char *opts) ++ unsigned char *opts, ++ struct tcphdr *_tcph) + { + const struct tcphdr *tcp; +- struct tcphdr _tcph; + +- tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); ++ tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph); + if (!tcp) + return NULL; + +@@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, + int fmatch = FMATCH_WRONG; + struct nf_osf_hdr_ctx ctx; + const struct tcphdr *tcp; ++ struct tcphdr _tcph; + + memset(&ctx, 0, sizeof(ctx)); + +- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); ++ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); + if (!tcp) + return false; + +@@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *s + const struct nf_osf_finger *kf; + struct nf_osf_hdr_ctx ctx; + const struct tcphdr *tcp; ++ struct tcphdr _tcph; + + memset(&ctx, 0, sizeof(ctx)); + +- tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); ++ tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); + if (!tcp) + return false; + diff --git a/queue-5.6/objtool-fix-stack-offset-tracking-for-indirect-cfas.patch b/queue-5.6/objtool-fix-stack-offset-tracking-for-indirect-cfas.patch new file mode 100644 index 00000000000..5b4a59a100a --- /dev/null +++ b/queue-5.6/objtool-fix-stack-offset-tracking-for-indirect-cfas.patch @@ -0,0 +1,50 @@ +From d8dd25a461e4eec7190cb9d66616aceacc5110ad Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:03:00 -0500 +Subject: objtool: Fix stack offset tracking for indirect CFAs + +From: Josh Poimboeuf + +commit d8dd25a461e4eec7190cb9d66616aceacc5110ad upstream. + +When the current frame address (CFA) is stored on the stack (i.e., +cfa->base == CFI_SP_INDIRECT), objtool neglects to adjust the stack +offset when there are subsequent pushes or pops. This results in bad +ORC data at the end of the ENTER_IRQ_STACK macro, when it puts the +previous stack pointer on the stack and does a subsequent push. + +This fixes the following unwinder warning: + + WARNING: can't dereference registers at 00000000f0a6bdba for ip interrupt_entry+0x9f/0xa0 + +Fixes: 627fce14809b ("objtool: Add ORC unwind table generation") +Reported-by: Vince Weaver +Reported-by: Dave Jones +Reported-by: Steven Rostedt +Reported-by: Vegard Nossum +Reported-by: Joe Mario +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: https://lore.kernel.org/r/853d5d691b29e250333332f09b8e27410b2d9924.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + tools/objtool/check.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/tools/objtool/check.c ++++ b/tools/objtool/check.c +@@ -1403,7 +1403,7 @@ static int update_insn_state_regs(struct + struct cfi_reg *cfa = &state->cfa; + struct stack_op *op = &insn->stack_op; + +- if (cfa->base != CFI_SP) ++ if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT) + return 0; + + /* push */ diff --git a/queue-5.6/scripts-decodecode-fix-trapping-instruction-formatting.patch b/queue-5.6/scripts-decodecode-fix-trapping-instruction-formatting.patch new file mode 100644 index 00000000000..56a7c2bbed0 --- /dev/null +++ b/queue-5.6/scripts-decodecode-fix-trapping-instruction-formatting.patch @@ -0,0 +1,46 @@ +From e08df079b23e2e982df15aa340bfbaf50f297504 Mon Sep 17 00:00:00 2001 +From: Ivan Delalande +Date: Thu, 7 May 2020 18:35:53 -0700 +Subject: scripts/decodecode: fix trapping instruction formatting + +From: Ivan Delalande + +commit e08df079b23e2e982df15aa340bfbaf50f297504 upstream. + +If the trapping instruction contains a ':', for a memory access through +segment registers for example, the sed substitution will insert the '*' +marker in the middle of the instruction instead of the line address: + + 2b: 65 48 0f c7 0f cmpxchg16b %gs:*(%rdi) <-- trapping instruction + +I started to think I had forgotten some quirk of the assembly syntax +before noticing that it was actually coming from the script. Fix it to +add the address marker at the right place for these instructions: + + 28: 49 8b 06 mov (%r14),%rax + 2b:* 65 48 0f c7 0f cmpxchg16b %gs:(%rdi) <-- trapping instruction + 30: 0f 94 c0 sete %al + +Fixes: 18ff44b189e2 ("scripts/decodecode: make faulting insn ptr more robust") +Signed-off-by: Ivan Delalande +Signed-off-by: Andrew Morton +Reviewed-by: Borislav Petkov +Link: http://lkml.kernel.org/r/20200419223653.GA31248@visor +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman + +--- + scripts/decodecode | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/scripts/decodecode ++++ b/scripts/decodecode +@@ -126,7 +126,7 @@ faultlinenum=$(( $(wc -l $T.oo | cut -d + faultline=`cat $T.dis | head -1 | cut -d":" -f2-` + faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'` + +-cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/" ++cat $T.oo | sed -e "${faultlinenum}s/^\([^:]*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/" + echo + cat $T.aa + cleanup diff --git a/queue-5.6/series b/queue-5.6/series index 1eb025a80e5..c8e68384b9a 100644 --- a/queue-5.6/series +++ b/queue-5.6/series @@ -93,3 +93,22 @@ batman-adv-fix-batadv_nc_random_weight_tq.patch batman-adv-fix-refcnt-leak-in-batadv_show_throughput_override.patch batman-adv-fix-refcnt-leak-in-batadv_store_throughput_override.patch batman-adv-fix-refcnt-leak-in-batadv_v_ogm_process.patch +x86-mm-cpa-flush-direct-map-alias-during-cpa.patch +x86-entry-64-fix-unwind-hints-in-register-clearing-code.patch +x86-entry-64-fix-unwind-hints-in-kernel-exit-path.patch +x86-entry-64-fix-unwind-hints-in-__switch_to_asm.patch +x86-entry-64-fix-unwind-hints-in-rewind_stack_do_exit.patch +x86-unwind-orc-don-t-skip-the-first-frame-for-inactive-tasks.patch +x86-unwind-orc-prevent-unwinding-before-orc-initialization.patch +x86-unwind-orc-fix-error-path-for-bad-orc-entry-type.patch +x86-unwind-orc-fix-premature-unwind-stoppage-due-to-iret-frames.patch +kvm-x86-fixes-posted-interrupt-check-for-irqs-delivery-modes.patch +arch-x86-kvm-svm-sev.c-change-flag-passed-to-gup-fast-in-sev_pin_memory.patch +netfilter-nat-never-update-the-udp-checksum-when-it-s-0.patch +netfilter-nf_osf-avoid-passing-pointer-to-local-var.patch +kvm-ioapic-restrict-lazy-eoi-update-to-edge-triggered-interrupts.patch +objtool-fix-stack-offset-tracking-for-indirect-cfas.patch +iommu-virtio-reverse-arguments-to-list_add.patch +scripts-decodecode-fix-trapping-instruction-formatting.patch +mm-memcg-fix-error-return-value-of-mem_cgroup_css_alloc.patch +kvm-x86-use-kvm-cpu-capabilities-to-determine-cr4-reserved-bits.patch diff --git a/queue-5.6/x86-entry-64-fix-unwind-hints-in-__switch_to_asm.patch b/queue-5.6/x86-entry-64-fix-unwind-hints-in-__switch_to_asm.patch new file mode 100644 index 00000000000..82562b93a25 --- /dev/null +++ b/queue-5.6/x86-entry-64-fix-unwind-hints-in-__switch_to_asm.patch @@ -0,0 +1,56 @@ +From 96c64806b4bf35f5edb465cafa6cec490e424a30 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:03:03 -0500 +Subject: x86/entry/64: Fix unwind hints in __switch_to_asm() + +From: Josh Poimboeuf + +commit 96c64806b4bf35f5edb465cafa6cec490e424a30 upstream. + +UNWIND_HINT_FUNC has some limitations: specifically, it doesn't reset +all the registers to undefined. This causes objtool to get confused +about the RBP push in __switch_to_asm(), resulting in bad ORC data. + +While __switch_to_asm() does do some stack magic, it's otherwise a +normal callable-from-C function, so just annotate it as a function, +which makes objtool happy and allows it to produces the correct hints +automatically. + +Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/03d0411920d10f7418f2e909210d8e9a3b2ab081.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64) + * %rdi: prev task + * %rsi: next task + */ +-SYM_CODE_START(__switch_to_asm) +- UNWIND_HINT_FUNC ++SYM_FUNC_START(__switch_to_asm) + /* + * Save callee-saved registers + * This must match the order in inactive_task_frame +@@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm) + popq %rbp + + jmp __switch_to +-SYM_CODE_END(__switch_to_asm) ++SYM_FUNC_END(__switch_to_asm) + + /* + * A newly forked process directly context switches into this address. diff --git a/queue-5.6/x86-entry-64-fix-unwind-hints-in-kernel-exit-path.patch b/queue-5.6/x86-entry-64-fix-unwind-hints-in-kernel-exit-path.patch new file mode 100644 index 00000000000..446684f2af3 --- /dev/null +++ b/queue-5.6/x86-entry-64-fix-unwind-hints-in-kernel-exit-path.patch @@ -0,0 +1,69 @@ +From 1fb143634a38095b641a3a21220774799772dc4c Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:03:02 -0500 +Subject: x86/entry/64: Fix unwind hints in kernel exit path + +From: Josh Poimboeuf + +commit 1fb143634a38095b641a3a21220774799772dc4c upstream. + +In swapgs_restore_regs_and_return_to_usermode, after the stack is +switched to the trampoline stack, the existing UNWIND_HINT_REGS hint is +no longer valid, which can result in the following ORC unwinder warning: + + WARNING: can't dereference registers at 000000003aeb0cdd for ip swapgs_restore_regs_and_return_to_usermode+0x93/0xa0 + +For full correctness, we could try to add complicated unwind hints so +the unwinder could continue to find the registers, but when when it's +this close to kernel exit, unwind hints aren't really needed anymore and +it's fine to just use an empty hint which tells the unwinder to stop. + +For consistency, also move the UNWIND_HINT_EMPTY in +entry_SYSCALL_64_after_hwframe to a similar location. + +Fixes: 3e3b9293d392 ("x86/entry/64: Return to userspace from the trampoline stack") +Reported-by: Vince Weaver +Reported-by: Dave Jones +Reported-by: Dr. David Alan Gilbert +Reported-by: Joe Mario +Reported-by: Jann Horn +Reported-by: Linus Torvalds +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Link: https://lore.kernel.org/r/60ea8f562987ed2d9ace2977502fe481c0d7c9a0.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_h + */ + syscall_return_via_sysret: + /* rcx and r11 are already restored (see code above) */ +- UNWIND_HINT_EMPTY + POP_REGS pop_rdi=0 skip_r11rcx=1 + + /* +@@ -258,6 +257,7 @@ syscall_return_via_sysret: + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp ++ UNWIND_HINT_EMPTY + + pushq RSP-RDI(%rdi) /* RSP */ + pushq (%rdi) /* RDI */ +@@ -637,6 +637,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_ + */ + movq %rsp, %rdi + movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp ++ UNWIND_HINT_EMPTY + + /* Copy the IRET frame to the trampoline stack. */ + pushq 6*8(%rdi) /* SS */ diff --git a/queue-5.6/x86-entry-64-fix-unwind-hints-in-register-clearing-code.patch b/queue-5.6/x86-entry-64-fix-unwind-hints-in-register-clearing-code.patch new file mode 100644 index 00000000000..5d65eb030f0 --- /dev/null +++ b/queue-5.6/x86-entry-64-fix-unwind-hints-in-register-clearing-code.patch @@ -0,0 +1,110 @@ +From 06a9750edcffa808494d56da939085c35904e618 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:03:01 -0500 +Subject: x86/entry/64: Fix unwind hints in register clearing code + +From: Josh Poimboeuf + +commit 06a9750edcffa808494d56da939085c35904e618 upstream. + +The PUSH_AND_CLEAR_REGS macro zeroes each register immediately after +pushing it. If an NMI or exception hits after a register is cleared, +but before the UNWIND_HINT_REGS annotation, the ORC unwinder will +wrongly think the previous value of the register was zero. This can +confuse the unwinding process and cause it to exit early. + +Because ORC is simpler than DWARF, there are a limited number of unwind +annotation states, so it's not possible to add an individual unwind hint +after each push/clear combination. Instead, the register clearing +instructions need to be consolidated and moved to after the +UNWIND_HINT_REGS annotation. + +Fixes: 3f01daecd545 ("x86/entry/64: Introduce the PUSH_AND_CLEAN_REGS macro") +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/68fd3d0bc92ae2d62ff7879d15d3684217d51f08.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/calling.h | 40 +++++++++++++++++++++------------------- + 1 file changed, 21 insertions(+), 19 deletions(-) + +--- a/arch/x86/entry/calling.h ++++ b/arch/x86/entry/calling.h +@@ -98,13 +98,6 @@ For 32-bit we have the following convent + #define SIZEOF_PTREGS 21*8 + + .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0 +- /* +- * Push registers and sanitize registers of values that a +- * speculation attack might otherwise want to exploit. The +- * lower registers are likely clobbered well before they +- * could be put to use in a speculative execution gadget. +- * Interleave XOR with PUSH for better uop scheduling: +- */ + .if \save_ret + pushq %rsi /* pt_regs->si */ + movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */ +@@ -114,34 +107,43 @@ For 32-bit we have the following convent + pushq %rsi /* pt_regs->si */ + .endif + pushq \rdx /* pt_regs->dx */ +- xorl %edx, %edx /* nospec dx */ + pushq %rcx /* pt_regs->cx */ +- xorl %ecx, %ecx /* nospec cx */ + pushq \rax /* pt_regs->ax */ + pushq %r8 /* pt_regs->r8 */ +- xorl %r8d, %r8d /* nospec r8 */ + pushq %r9 /* pt_regs->r9 */ +- xorl %r9d, %r9d /* nospec r9 */ + pushq %r10 /* pt_regs->r10 */ +- xorl %r10d, %r10d /* nospec r10 */ + pushq %r11 /* pt_regs->r11 */ +- xorl %r11d, %r11d /* nospec r11*/ + pushq %rbx /* pt_regs->rbx */ +- xorl %ebx, %ebx /* nospec rbx*/ + pushq %rbp /* pt_regs->rbp */ +- xorl %ebp, %ebp /* nospec rbp*/ + pushq %r12 /* pt_regs->r12 */ +- xorl %r12d, %r12d /* nospec r12*/ + pushq %r13 /* pt_regs->r13 */ +- xorl %r13d, %r13d /* nospec r13*/ + pushq %r14 /* pt_regs->r14 */ +- xorl %r14d, %r14d /* nospec r14*/ + pushq %r15 /* pt_regs->r15 */ +- xorl %r15d, %r15d /* nospec r15*/ + UNWIND_HINT_REGS ++ + .if \save_ret + pushq %rsi /* return address on top of stack */ + .endif ++ ++ /* ++ * Sanitize registers of values that a speculation attack might ++ * otherwise want to exploit. The lower registers are likely clobbered ++ * well before they could be put to use in a speculative execution ++ * gadget. ++ */ ++ xorl %edx, %edx /* nospec dx */ ++ xorl %ecx, %ecx /* nospec cx */ ++ xorl %r8d, %r8d /* nospec r8 */ ++ xorl %r9d, %r9d /* nospec r9 */ ++ xorl %r10d, %r10d /* nospec r10 */ ++ xorl %r11d, %r11d /* nospec r11 */ ++ xorl %ebx, %ebx /* nospec rbx */ ++ xorl %ebp, %ebp /* nospec rbp */ ++ xorl %r12d, %r12d /* nospec r12 */ ++ xorl %r13d, %r13d /* nospec r13 */ ++ xorl %r14d, %r14d /* nospec r14 */ ++ xorl %r15d, %r15d /* nospec r15 */ ++ + .endm + + .macro POP_REGS pop_rdi=1 skip_r11rcx=0 diff --git a/queue-5.6/x86-entry-64-fix-unwind-hints-in-rewind_stack_do_exit.patch b/queue-5.6/x86-entry-64-fix-unwind-hints-in-rewind_stack_do_exit.patch new file mode 100644 index 00000000000..778ca85f277 --- /dev/null +++ b/queue-5.6/x86-entry-64-fix-unwind-hints-in-rewind_stack_do_exit.patch @@ -0,0 +1,41 @@ +From f977df7b7ca45a4ac4b66d30a8931d0434c394b1 Mon Sep 17 00:00:00 2001 +From: Jann Horn +Date: Sat, 25 Apr 2020 05:03:04 -0500 +Subject: x86/entry/64: Fix unwind hints in rewind_stack_do_exit() + +From: Jann Horn + +commit f977df7b7ca45a4ac4b66d30a8931d0434c394b1 upstream. + +The LEAQ instruction in rewind_stack_do_exit() moves the stack pointer +directly below the pt_regs at the top of the task stack before calling +do_exit(). Tell the unwinder to expect pt_regs. + +Fixes: 8c1f75587a18 ("x86/entry/64: Add unwind hint annotations") +Reviewed-by: Miroslav Benes +Signed-off-by: Jann Horn +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/68c33e17ae5963854916a46f522624f8e1d264f2.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit) + + movq PER_CPU_VAR(cpu_current_top_of_stack), %rax + leaq -PTREGS_SIZE(%rax), %rsp +- UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE ++ UNWIND_HINT_REGS + + call do_exit + SYM_CODE_END(rewind_stack_do_exit) diff --git a/queue-5.6/x86-mm-cpa-flush-direct-map-alias-during-cpa.patch b/queue-5.6/x86-mm-cpa-flush-direct-map-alias-during-cpa.patch new file mode 100644 index 00000000000..df7283a223e --- /dev/null +++ b/queue-5.6/x86-mm-cpa-flush-direct-map-alias-during-cpa.patch @@ -0,0 +1,76 @@ +From ab5130186d7476dcee0d4e787d19a521ca552ce9 Mon Sep 17 00:00:00 2001 +From: Rick Edgecombe +Date: Wed, 22 Apr 2020 20:13:55 -0700 +Subject: x86/mm/cpa: Flush direct map alias during cpa + +From: Rick Edgecombe + +commit ab5130186d7476dcee0d4e787d19a521ca552ce9 upstream. + +As an optimization, cpa_flush() was changed to optionally only flush +the range in @cpa if it was small enough. However, this range does +not include any direct map aliases changed in cpa_process_alias(). So +small set_memory_() calls that touch that alias don't get the direct +map changes flushed. This situation can happen when the virtual +address taking variants are passed an address in vmalloc or modules +space. + +In these cases, force a full TLB flush. + +Note this issue does not extend to cases where the set_memory_() calls are +passed a direct map address, or page array, etc, as the primary target. In +those cases the direct map would be flushed. + +Fixes: 935f5839827e ("x86/mm/cpa: Optimize cpa_flush_array() TLB invalidation") +Signed-off-by: Rick Edgecombe +Signed-off-by: Peter Zijlstra (Intel) +Link: https://lkml.kernel.org/r/20200424105343.GA20730@hirez.programming.kicks-ass.net +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/mm/pat/set_memory.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -42,7 +42,8 @@ struct cpa_data { + unsigned long pfn; + unsigned int flags; + unsigned int force_split : 1, +- force_static_prot : 1; ++ force_static_prot : 1, ++ force_flush_all : 1; + struct page **pages; + }; + +@@ -352,10 +353,10 @@ static void cpa_flush(struct cpa_data *d + return; + } + +- if (cpa->numpages <= tlb_single_page_flush_ceiling) +- on_each_cpu(__cpa_flush_tlb, cpa, 1); +- else ++ if (cpa->force_flush_all || cpa->numpages > tlb_single_page_flush_ceiling) + flush_tlb_all(); ++ else ++ on_each_cpu(__cpa_flush_tlb, cpa, 1); + + if (!cache) + return; +@@ -1595,6 +1596,8 @@ static int cpa_process_alias(struct cpa_ + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + alias_cpa.curpage = 0; + ++ cpa->force_flush_all = 1; ++ + ret = __change_page_attr_set_clr(&alias_cpa, 0); + if (ret) + return ret; +@@ -1615,6 +1618,7 @@ static int cpa_process_alias(struct cpa_ + alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY); + alias_cpa.curpage = 0; + ++ cpa->force_flush_all = 1; + /* + * The high mapping range is imprecise, so ignore the + * return value. diff --git a/queue-5.6/x86-unwind-orc-don-t-skip-the-first-frame-for-inactive-tasks.patch b/queue-5.6/x86-unwind-orc-don-t-skip-the-first-frame-for-inactive-tasks.patch new file mode 100644 index 00000000000..ab937591fde --- /dev/null +++ b/queue-5.6/x86-unwind-orc-don-t-skip-the-first-frame-for-inactive-tasks.patch @@ -0,0 +1,48 @@ +From f1d9a2abff66aa8156fbc1493abed468db63ea48 Mon Sep 17 00:00:00 2001 +From: Miroslav Benes +Date: Sat, 25 Apr 2020 05:03:07 -0500 +Subject: x86/unwind/orc: Don't skip the first frame for inactive tasks + +From: Miroslav Benes + +commit f1d9a2abff66aa8156fbc1493abed468db63ea48 upstream. + +When unwinding an inactive task, the ORC unwinder skips the first frame +by default. If both the 'regs' and 'first_frame' parameters of +unwind_start() are NULL, 'state->sp' and 'first_frame' are later +initialized to the same value for an inactive task. Given there is a +"less than or equal to" comparison used at the end of __unwind_start() +for skipping stack frames, the first frame is skipped. + +Drop the equal part of the comparison and make the behavior equivalent +to the frame pointer unwinder. + +Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") +Reviewed-by: Miroslav Benes +Signed-off-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/7f08db872ab59e807016910acdbe82f744de7065.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/unwind_orc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -651,7 +651,7 @@ void __unwind_start(struct unwind_state + /* Otherwise, skip ahead to the user-specified starting frame: */ + while (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || +- state->sp <= (unsigned long)first_frame)) ++ state->sp < (unsigned long)first_frame)) + unwind_next_frame(state); + + return; diff --git a/queue-5.6/x86-unwind-orc-fix-error-path-for-bad-orc-entry-type.patch b/queue-5.6/x86-unwind-orc-fix-error-path-for-bad-orc-entry-type.patch new file mode 100644 index 00000000000..08f76c52dda --- /dev/null +++ b/queue-5.6/x86-unwind-orc-fix-error-path-for-bad-orc-entry-type.patch @@ -0,0 +1,41 @@ +From a0f81bf26888048100bf017fadf438a5bdffa8d8 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:06:13 -0500 +Subject: x86/unwind/orc: Fix error path for bad ORC entry type + +From: Josh Poimboeuf + +commit a0f81bf26888048100bf017fadf438a5bdffa8d8 upstream. + +If the ORC entry type is unknown, nothing else can be done other than +reporting an error. Exit the function instead of breaking out of the +switch statement. + +Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/a7fa668ca6eabbe81ab18b2424f15adbbfdc810a.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/unwind_orc.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -531,7 +531,7 @@ bool unwind_next_frame(struct unwind_sta + default: + orc_warn("unknown .orc_unwind entry type %d for ip %pB\n", + orc->type, (void *)orig_ip); +- break; ++ goto err; + } + + /* Find BP: */ diff --git a/queue-5.6/x86-unwind-orc-fix-premature-unwind-stoppage-due-to-iret-frames.patch b/queue-5.6/x86-unwind-orc-fix-premature-unwind-stoppage-due-to-iret-frames.patch new file mode 100644 index 00000000000..e492a97a317 --- /dev/null +++ b/queue-5.6/x86-unwind-orc-fix-premature-unwind-stoppage-due-to-iret-frames.patch @@ -0,0 +1,215 @@ +From 81b67439d147677d844d492fcbd03712ea438f42 Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:06:14 -0500 +Subject: x86/unwind/orc: Fix premature unwind stoppage due to IRET frames + +From: Josh Poimboeuf + +commit 81b67439d147677d844d492fcbd03712ea438f42 upstream. + +The following execution path is possible: + + fsnotify() + [ realign the stack and store previous SP in R10 ] + + [ only IRET regs saved ] + common_interrupt() + interrupt_entry() + + [ full pt_regs saved ] + ... + [ unwind stack ] + +When the unwinder goes through the NMI and the IRQ on the stack, and +then sees fsnotify(), it doesn't have access to the value of R10, +because it only has the five IRET registers. So the unwind stops +prematurely. + +However, because the interrupt_entry() code is careful not to clobber +R10 before saving the full regs, the unwinder should be able to read R10 +from the previously saved full pt_regs associated with the NMI. + +Handle this case properly. When encountering an IRET regs frame +immediately after a full pt_regs frame, use the pt_regs as a backup +which can be used to get the C register values. + +Also, note that a call frame resets the 'prev_regs' value, because a +function is free to clobber the registers. For this fix to work, the +IRET and full regs frames must be adjacent, with no FUNC frames in +between. So replace the FUNC hint in interrupt_entry() with an +IRET_REGS hint. + +Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/97a408167cc09f1cfa0de31a7b70dd88868d743f.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/entry/entry_64.S | 4 +-- + arch/x86/include/asm/unwind.h | 2 - + arch/x86/kernel/unwind_orc.c | 51 ++++++++++++++++++++++++++++++++---------- + 3 files changed, 43 insertions(+), 14 deletions(-) + +--- a/arch/x86/entry/entry_64.S ++++ b/arch/x86/entry/entry_64.S +@@ -511,7 +511,7 @@ SYM_CODE_END(spurious_entries_start) + * +----------------------------------------------------+ + */ + SYM_CODE_START(interrupt_entry) +- UNWIND_HINT_FUNC ++ UNWIND_HINT_IRET_REGS offset=16 + ASM_CLAC + cld + +@@ -543,9 +543,9 @@ SYM_CODE_START(interrupt_entry) + pushq 5*8(%rdi) /* regs->eflags */ + pushq 4*8(%rdi) /* regs->cs */ + pushq 3*8(%rdi) /* regs->ip */ ++ UNWIND_HINT_IRET_REGS + pushq 2*8(%rdi) /* regs->orig_ax */ + pushq 8(%rdi) /* return address */ +- UNWIND_HINT_FUNC + + movq (%rdi), %rdi + jmp 2f +--- a/arch/x86/include/asm/unwind.h ++++ b/arch/x86/include/asm/unwind.h +@@ -19,7 +19,7 @@ struct unwind_state { + #if defined(CONFIG_UNWINDER_ORC) + bool signal, full_regs; + unsigned long sp, bp, ip; +- struct pt_regs *regs; ++ struct pt_regs *regs, *prev_regs; + #elif defined(CONFIG_UNWINDER_FRAME_POINTER) + bool got_irq; + unsigned long *bp, *orig_sp, ip; +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -378,9 +378,38 @@ static bool deref_stack_iret_regs(struct + return true; + } + ++/* ++ * If state->regs is non-NULL, and points to a full pt_regs, just get the reg ++ * value from state->regs. ++ * ++ * Otherwise, if state->regs just points to IRET regs, and the previous frame ++ * had full regs, it's safe to get the value from the previous regs. This can ++ * happen when early/late IRQ entry code gets interrupted by an NMI. ++ */ ++static bool get_reg(struct unwind_state *state, unsigned int reg_off, ++ unsigned long *val) ++{ ++ unsigned int reg = reg_off/8; ++ ++ if (!state->regs) ++ return false; ++ ++ if (state->full_regs) { ++ *val = ((unsigned long *)state->regs)[reg]; ++ return true; ++ } ++ ++ if (state->prev_regs) { ++ *val = ((unsigned long *)state->prev_regs)[reg]; ++ return true; ++ } ++ ++ return false; ++} ++ + bool unwind_next_frame(struct unwind_state *state) + { +- unsigned long ip_p, sp, orig_ip = state->ip, prev_sp = state->sp; ++ unsigned long ip_p, sp, tmp, orig_ip = state->ip, prev_sp = state->sp; + enum stack_type prev_type = state->stack_info.type; + struct orc_entry *orc; + bool indirect = false; +@@ -442,39 +471,35 @@ bool unwind_next_frame(struct unwind_sta + break; + + case ORC_REG_R10: +- if (!state->regs || !state->full_regs) { ++ if (!get_reg(state, offsetof(struct pt_regs, r10), &sp)) { + orc_warn("missing regs for base reg R10 at ip %pB\n", + (void *)state->ip); + goto err; + } +- sp = state->regs->r10; + break; + + case ORC_REG_R13: +- if (!state->regs || !state->full_regs) { ++ if (!get_reg(state, offsetof(struct pt_regs, r13), &sp)) { + orc_warn("missing regs for base reg R13 at ip %pB\n", + (void *)state->ip); + goto err; + } +- sp = state->regs->r13; + break; + + case ORC_REG_DI: +- if (!state->regs || !state->full_regs) { ++ if (!get_reg(state, offsetof(struct pt_regs, di), &sp)) { + orc_warn("missing regs for base reg DI at ip %pB\n", + (void *)state->ip); + goto err; + } +- sp = state->regs->di; + break; + + case ORC_REG_DX: +- if (!state->regs || !state->full_regs) { ++ if (!get_reg(state, offsetof(struct pt_regs, dx), &sp)) { + orc_warn("missing regs for base reg DX at ip %pB\n", + (void *)state->ip); + goto err; + } +- sp = state->regs->dx; + break; + + default: +@@ -501,6 +526,7 @@ bool unwind_next_frame(struct unwind_sta + + state->sp = sp; + state->regs = NULL; ++ state->prev_regs = NULL; + state->signal = false; + break; + +@@ -512,6 +538,7 @@ bool unwind_next_frame(struct unwind_sta + } + + state->regs = (struct pt_regs *)sp; ++ state->prev_regs = NULL; + state->full_regs = true; + state->signal = true; + break; +@@ -523,6 +550,8 @@ bool unwind_next_frame(struct unwind_sta + goto err; + } + ++ if (state->full_regs) ++ state->prev_regs = state->regs; + state->regs = (void *)sp - IRET_FRAME_OFFSET; + state->full_regs = false; + state->signal = true; +@@ -537,8 +566,8 @@ bool unwind_next_frame(struct unwind_sta + /* Find BP: */ + switch (orc->bp_reg) { + case ORC_REG_UNDEFINED: +- if (state->regs && state->full_regs) +- state->bp = state->regs->bp; ++ if (get_reg(state, offsetof(struct pt_regs, bp), &tmp)) ++ state->bp = tmp; + break; + + case ORC_REG_PREV_SP: diff --git a/queue-5.6/x86-unwind-orc-prevent-unwinding-before-orc-initialization.patch b/queue-5.6/x86-unwind-orc-prevent-unwinding-before-orc-initialization.patch new file mode 100644 index 00000000000..8925e14e2f5 --- /dev/null +++ b/queue-5.6/x86-unwind-orc-prevent-unwinding-before-orc-initialization.patch @@ -0,0 +1,55 @@ +From 98d0c8ebf77e0ba7c54a9ae05ea588f0e9e3f46e Mon Sep 17 00:00:00 2001 +From: Josh Poimboeuf +Date: Sat, 25 Apr 2020 05:03:08 -0500 +Subject: x86/unwind/orc: Prevent unwinding before ORC initialization + +From: Josh Poimboeuf + +commit 98d0c8ebf77e0ba7c54a9ae05ea588f0e9e3f46e upstream. + +If the unwinder is called before the ORC data has been initialized, +orc_find() returns NULL, and it tries to fall back to using frame +pointers. This can cause some unexpected warnings during boot. + +Move the 'orc_init' check from orc_find() to __unwind_init(), so that it +doesn't even try to unwind from an uninitialized state. + +Fixes: ee9f8fce9964 ("x86/unwind: Add the ORC unwinder") +Reviewed-by: Miroslav Benes +Signed-off-by: Josh Poimboeuf +Signed-off-by: Ingo Molnar +Cc: Andy Lutomirski +Cc: Dave Jones +Cc: Jann Horn +Cc: Peter Zijlstra +Cc: Thomas Gleixner +Cc: Vince Weaver +Link: https://lore.kernel.org/r/069d1499ad606d85532eb32ce39b2441679667d5.1587808742.git.jpoimboe@redhat.com +Signed-off-by: Greg Kroah-Hartman + +--- + arch/x86/kernel/unwind_orc.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/arch/x86/kernel/unwind_orc.c ++++ b/arch/x86/kernel/unwind_orc.c +@@ -142,9 +142,6 @@ static struct orc_entry *orc_find(unsign + { + static struct orc_entry *orc; + +- if (!orc_init) +- return NULL; +- + if (ip == 0) + return &null_orc_entry; + +@@ -585,6 +582,9 @@ EXPORT_SYMBOL_GPL(unwind_next_frame); + void __unwind_start(struct unwind_state *state, struct task_struct *task, + struct pt_regs *regs, unsigned long *first_frame) + { ++ if (!orc_init) ++ goto done; ++ + memset(state, 0, sizeof(*state)); + state->task = task; +