]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 May 2021 09:30:01 +0000 (11:30 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sun, 16 May 2021 09:30:01 +0000 (11:30 +0200)
added patches:
arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
arc-mm-pae-use-40-bit-physical-page-mask.patch
arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
btrfs-initialize-return-variable-in-cleanup_free_space_cache_v1.patch
btrfs-zoned-fix-silent-data-loss-after-failure-splitting-ordered-extent.patch
btrfs-zoned-sanity-check-zone-type.patch
drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
drm-i915-avoid-div-by-zero-on-gen2.patch
drm-i915-dp-use-slow-and-wide-link-training-for-everything.patch
drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
hfsplus-prevent-corruption-in-shrinking-truncate.patch
kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
kvm-exit-halt-polling-on-need_resched-as-well.patch
mm-hugetlb-fix-cow-where-page-writtable-in-child.patch
mm-hugetlb-fix-f_seal_future_write.patch
powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
sh-remove-unused-variable.patch
squashfs-fix-divide-error-in-calculate_skip.patch
userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch

27 files changed:
queue-5.12/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch [new file with mode: 0644]
queue-5.12/arc-mm-pae-use-40-bit-physical-page-mask.patch [new file with mode: 0644]
queue-5.12/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch [new file with mode: 0644]
queue-5.12/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch [new file with mode: 0644]
queue-5.12/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch [new file with mode: 0644]
queue-5.12/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch [new file with mode: 0644]
queue-5.12/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch [new file with mode: 0644]
queue-5.12/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch [new file with mode: 0644]
queue-5.12/btrfs-initialize-return-variable-in-cleanup_free_space_cache_v1.patch [new file with mode: 0644]
queue-5.12/btrfs-zoned-fix-silent-data-loss-after-failure-splitting-ordered-extent.patch [new file with mode: 0644]
queue-5.12/btrfs-zoned-sanity-check-zone-type.patch [new file with mode: 0644]
queue-5.12/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch [new file with mode: 0644]
queue-5.12/drm-i915-avoid-div-by-zero-on-gen2.patch [new file with mode: 0644]
queue-5.12/drm-i915-dp-use-slow-and-wide-link-training-for-everything.patch [new file with mode: 0644]
queue-5.12/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch [new file with mode: 0644]
queue-5.12/hfsplus-prevent-corruption-in-shrinking-truncate.patch [new file with mode: 0644]
queue-5.12/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch [new file with mode: 0644]
queue-5.12/kvm-exit-halt-polling-on-need_resched-as-well.patch [new file with mode: 0644]
queue-5.12/mm-hugetlb-fix-cow-where-page-writtable-in-child.patch [new file with mode: 0644]
queue-5.12/mm-hugetlb-fix-f_seal_future_write.patch [new file with mode: 0644]
queue-5.12/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch [new file with mode: 0644]
queue-5.12/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch [new file with mode: 0644]
queue-5.12/series
queue-5.12/sh-remove-unused-variable.patch [new file with mode: 0644]
queue-5.12/squashfs-fix-divide-error-in-calculate_skip.patch [new file with mode: 0644]
queue-5.12/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch [new file with mode: 0644]
queue-5.12/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch [new file with mode: 0644]

diff --git a/queue-5.12/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch b/queue-5.12/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
new file mode 100644 (file)
index 0000000..1790674
--- /dev/null
@@ -0,0 +1,51 @@
+From 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Fri, 23 Apr 2021 12:16:25 -0700
+Subject: ARC: entry: fix off-by-one error in syscall number validation
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc upstream.
+
+We have NR_syscall syscalls from [0 .. NR_syscall-1].
+However the check for invalid syscall number is "> NR_syscall" as
+opposed to >=. This off-by-one error erronesously allows "NR_syscall"
+to be treated as valid syscall causeing out-of-bounds access into
+syscall-call table ensuing a crash (holes within syscall table have a
+invalid-entry handler but this is beyond the array implementing the
+table).
+
+This problem showed up on v5.6 kernel when testing glibc 2.33 (v5.10
+kernel capable, includng faccessat2 syscall 439). The v5.6 kernel has
+NR_syscalls=439 (0 to 438). Due to the bug, 439 passed by glibc was
+not handled as -ENOSYS but processed leading to a crash.
+
+Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/48
+Reported-by: Shahab Vahedi <shahab@synopsys.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/kernel/entry.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arc/kernel/entry.S
++++ b/arch/arc/kernel/entry.S
+@@ -177,7 +177,7 @@ tracesys:
+       ; Do the Sys Call as we normally would.
+       ; Validate the Sys Call number
+-      cmp     r8,  NR_syscalls
++      cmp     r8,  NR_syscalls - 1
+       mov.hi  r0, -ENOSYS
+       bhi     tracesys_exit
+@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
+       ;============ Normal syscall case
+       ; syscall num shd not exceed the total system calls avail
+-      cmp     r8,  NR_syscalls
++      cmp     r8,  NR_syscalls - 1
+       mov.hi  r0, -ENOSYS
+       bhi     .Lret_from_system_call
diff --git a/queue-5.12/arc-mm-pae-use-40-bit-physical-page-mask.patch b/queue-5.12/arc-mm-pae-use-40-bit-physical-page-mask.patch
new file mode 100644 (file)
index 0000000..17e5bfb
--- /dev/null
@@ -0,0 +1,133 @@
+From c5f756d8c6265ebb1736a7787231f010a3b782e5 Mon Sep 17 00:00:00 2001
+From: Vladimir Isaev <isaev@synopsys.com>
+Date: Tue, 27 Apr 2021 15:12:37 +0300
+Subject: ARC: mm: PAE: use 40-bit physical page mask
+
+From: Vladimir Isaev <isaev@synopsys.com>
+
+commit c5f756d8c6265ebb1736a7787231f010a3b782e5 upstream.
+
+32-bit PAGE_MASK can not be used as a mask for physical addresses
+when PAE is enabled. PAGE_MASK_PHYS must be used for physical
+addresses instead of PAGE_MASK.
+
+Without this, init gets SIGSEGV if pte_modify was called:
+
+| potentially unexpected fatal signal 11.
+| Path: /bin/busybox
+| CPU: 0 PID: 1 Comm: init Not tainted 5.12.0-rc5-00003-g1e43c377a79f-dirty
+| Insn could not be fetched
+|     @No matching VMA found
+|  ECR: 0x00040000 EFA: 0x00000000 ERET: 0x00000000
+| STAT: 0x80080082 [IE U     ]   BTA: 0x00000000
+|  SP: 0x5f9ffe44  FP: 0x00000000 BLK: 0xaf3d4
+| LPS: 0x000d093e LPE: 0x000d0950 LPC: 0x00000000
+| r00: 0x00000002 r01: 0x5f9fff14 r02: 0x5f9fff20
+| ...
+| Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
+
+Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Cc: Vineet Gupta <vgupta@synopsys.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/include/asm/page.h      |   12 ++++++++++++
+ arch/arc/include/asm/pgtable.h   |   12 +++---------
+ arch/arc/include/uapi/asm/page.h |    1 -
+ arch/arc/mm/ioremap.c            |    5 +++--
+ arch/arc/mm/tlb.c                |    2 +-
+ 5 files changed, 19 insertions(+), 13 deletions(-)
+
+--- a/arch/arc/include/asm/page.h
++++ b/arch/arc/include/asm/page.h
+@@ -7,6 +7,18 @@
+ #include <uapi/asm/page.h>
++#ifdef CONFIG_ARC_HAS_PAE40
++
++#define MAX_POSSIBLE_PHYSMEM_BITS     40
++#define PAGE_MASK_PHYS                        (0xff00000000ull | PAGE_MASK)
++
++#else /* CONFIG_ARC_HAS_PAE40 */
++
++#define MAX_POSSIBLE_PHYSMEM_BITS     32
++#define PAGE_MASK_PHYS                        PAGE_MASK
++
++#endif /* CONFIG_ARC_HAS_PAE40 */
++
+ #ifndef __ASSEMBLY__
+ #define clear_page(paddr)             memset((paddr), 0, PAGE_SIZE)
+--- a/arch/arc/include/asm/pgtable.h
++++ b/arch/arc/include/asm/pgtable.h
+@@ -107,8 +107,8 @@
+ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
+ /* Set of bits not changed in pte_modify */
+-#define _PAGE_CHG_MASK        (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
+-
++#define _PAGE_CHG_MASK        (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
++                                                         _PAGE_SPECIAL)
+ /* More Abbrevaited helpers */
+ #define PAGE_U_NONE     __pgprot(___DEF)
+ #define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+@@ -132,13 +132,7 @@
+ #define PTE_BITS_IN_PD0               (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+ #define PTE_BITS_RWX          (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+-#ifdef CONFIG_ARC_HAS_PAE40
+-#define PTE_BITS_NON_RWX_IN_PD1       (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+-#define MAX_POSSIBLE_PHYSMEM_BITS 40
+-#else
+-#define PTE_BITS_NON_RWX_IN_PD1       (PAGE_MASK | _PAGE_CACHEABLE)
+-#define MAX_POSSIBLE_PHYSMEM_BITS 32
+-#endif
++#define PTE_BITS_NON_RWX_IN_PD1       (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+ /**************************************************************************
+  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+--- a/arch/arc/include/uapi/asm/page.h
++++ b/arch/arc/include/uapi/asm/page.h
+@@ -33,5 +33,4 @@
+ #define PAGE_MASK     (~(PAGE_SIZE-1))
+-
+ #endif /* _UAPI__ASM_ARC_PAGE_H */
+--- a/arch/arc/mm/ioremap.c
++++ b/arch/arc/mm/ioremap.c
+@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
+ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+                          unsigned long flags)
+ {
++      unsigned int off;
+       unsigned long vaddr;
+       struct vm_struct *area;
+-      phys_addr_t off, end;
++      phys_addr_t end;
+       pgprot_t prot = __pgprot(flags);
+       /* Don't allow wraparound, zero size */
+@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t p
+       /* Mappings have to be page-aligned */
+       off = paddr & ~PAGE_MASK;
+-      paddr &= PAGE_MASK;
++      paddr &= PAGE_MASK_PHYS;
+       size = PAGE_ALIGN(end + 1) - paddr;
+       /*
+--- a/arch/arc/mm/tlb.c
++++ b/arch/arc/mm/tlb.c
+@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_str
+                     pte_t *ptep)
+ {
+       unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+-      phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
++      phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
+       struct page *page = pfn_to_page(pte_pfn(*ptep));
+       create_tlb(vma, vaddr, ptep);
diff --git a/queue-5.12/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch b/queue-5.12/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
new file mode 100644 (file)
index 0000000..062c31d
--- /dev/null
@@ -0,0 +1,56 @@
+From 1d5e4640e5df15252398c1b621f6bd432f2d7f17 Mon Sep 17 00:00:00 2001
+From: Vladimir Isaev <isaev@synopsys.com>
+Date: Tue, 27 Apr 2021 15:13:54 +0300
+Subject: ARC: mm: Use max_high_pfn as a HIGHMEM zone border
+
+From: Vladimir Isaev <isaev@synopsys.com>
+
+commit 1d5e4640e5df15252398c1b621f6bd432f2d7f17 upstream.
+
+Commit 4af22ded0ecf ("arc: fix memory initialization for systems
+with two memory banks") fixed highmem, but for the PAE case it causes
+bug messages:
+
+| BUG: Bad page state in process swapper  pfn:80000
+| page:(ptrval) refcount:0 mapcount:1 mapping:00000000 index:0x0 pfn:0x80000 flags: 0x0()
+| raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000
+| raw: 00000000
+| page dumped because: nonzero mapcount
+| Modules linked in:
+| CPU: 0 PID: 0 Comm: swapper Not tainted 5.12.0-rc5-00003-g1e43c377a79f #1
+
+This is because the fix expects highmem to be always less than
+lowmem and uses min_low_pfn as an upper zone border for highmem.
+
+max_high_pfn should be ok for both highmem and highmem+PAE cases.
+
+Fixes: 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks")
+Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: stable@vger.kernel.org  #5.8 onwards
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/mm/init.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/arc/mm/init.c
++++ b/arch/arc/mm/init.c
+@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
+       min_high_pfn = PFN_DOWN(high_mem_start);
+       max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+-      max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
++      /*
++       * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
++       * For HIGHMEM without PAE max_high_pfn should be less than
++       * min_low_pfn to guarantee that these two regions don't overlap.
++       * For PAE case highmem is greater than lowmem, so it is natural
++       * to use max_high_pfn.
++       *
++       * In both cases, holes should be handled by pfn_valid().
++       */
++      max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
+       high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
diff --git a/queue-5.12/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch b/queue-5.12/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
new file mode 100644 (file)
index 0000000..5dbb8c6
--- /dev/null
@@ -0,0 +1,53 @@
+From 588a513d34257fdde95a9f0df0202e31998e85c6 Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 14 May 2021 10:50:01 +0100
+Subject: arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 588a513d34257fdde95a9f0df0202e31998e85c6 upstream.
+
+To ensure that instructions are observable in a new mapping, the arm64
+set_pte_at() implementation cleans the D-cache and invalidates the
+I-cache to the PoU. As an optimisation, this is only done on executable
+mappings and the PG_dcache_clean page flag is set to avoid future cache
+maintenance on the same page.
+
+When two different processes map the same page (e.g. private executable
+file or shared mapping) there's a potential race on checking and setting
+PG_dcache_clean via set_pte_at() -> __sync_icache_dcache(). While on the
+fault paths the page is locked (PG_locked), mprotect() does not take the
+page lock. The result is that one process may see the PG_dcache_clean
+flag set but the I/D cache maintenance not yet performed.
+
+Avoid test_and_set_bit(PG_dcache_clean) in favour of separate test_bit()
+and set_bit(). In the rare event of a race, the cache maintenance is
+done twice.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: <stable@vger.kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Steven Price <steven.price@arm.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Will Deacon <will@kernel.org>
+Link: https://lore.kernel.org/r/20210514095001.13236-1-catalin.marinas@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/flush.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/flush.c
++++ b/arch/arm64/mm/flush.c
+@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
+ {
+       struct page *page = pte_page(pte);
+-      if (!test_and_set_bit(PG_dcache_clean, &page->flags))
++      if (!test_bit(PG_dcache_clean, &page->flags)) {
+               sync_icache_aliases(page_address(page), page_size(page));
++              set_bit(PG_dcache_clean, &page->flags);
++      }
+ }
+ EXPORT_SYMBOL_GPL(__sync_icache_dcache);
diff --git a/queue-5.12/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch b/queue-5.12/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
new file mode 100644 (file)
index 0000000..6187aa6
--- /dev/null
@@ -0,0 +1,51 @@
+From 37a8024d265564eba680575df6421f19db21dfce Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 7 May 2021 11:59:05 -0700
+Subject: arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit 37a8024d265564eba680575df6421f19db21dfce upstream.
+
+A valid implementation choice for the ChooseRandomNonExcludedTag()
+pseudocode function used by IRG is to behave in the same way as with
+GCR_EL1.RRND=0. This would mean that RGSR_EL1.SEED is used as an LFSR
+which must have a non-zero value in order for IRG to properly produce
+pseudorandom numbers. However, RGSR_EL1 is reset to an UNKNOWN value
+on soft reset and thus may reset to 0. Therefore we must initialize
+RGSR_EL1.SEED to a non-zero value in order to ensure that IRG behaves
+as expected.
+
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration")
+Cc: <stable@vger.kernel.org> # 5.10
+Link: https://linux-review.googlesource.com/id/I2b089b6c7d6f17ee37e2f0db7df5ad5bcc04526c
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20210507185905.1745402-1-pcc@google.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/proc.S |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/arch/arm64/mm/proc.S
++++ b/arch/arm64/mm/proc.S
+@@ -444,6 +444,18 @@ SYM_FUNC_START(__cpu_setup)
+       mov     x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
+       msr_s   SYS_GCR_EL1, x10
++      /*
++       * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
++       * RGSR_EL1.SEED must be non-zero for IRG to produce
++       * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
++       * must initialize it.
++       */
++      mrs     x10, CNTVCT_EL0
++      ands    x10, x10, #SYS_RGSR_EL1_SEED_MASK
++      csinc   x10, x10, xzr, ne
++      lsl     x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
++      msr_s   SYS_RGSR_EL1, x10
++
+       /* clear any pending tag check faults in TFSR*_EL1 */
+       msr_s   SYS_TFSR_EL1, xzr
+       msr_s   SYS_TFSRE0_EL1, xzr
diff --git a/queue-5.12/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch b/queue-5.12/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
new file mode 100644 (file)
index 0000000..7aeee23
--- /dev/null
@@ -0,0 +1,90 @@
+From e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 11 May 2021 21:38:36 -0400
+Subject: blk-iocost: fix weight updates of inner active iocgs
+
+From: Tejun Heo <tj@kernel.org>
+
+commit e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 upstream.
+
+When the weight of an active iocg is updated, weight_updated() is called
+which in turn calls __propagate_weights() to update the active and inuse
+weights so that the effective hierarchical weights are update accordingly.
+
+The current implementation is incorrect for inner active nodes. For an
+active leaf iocg, inuse can be any value between 1 and active and the
+difference represents how much the iocg is donating. When weight is updated,
+as long as inuse is clamped between 1 and the new weight, we're alright and
+this is what __propagate_weights() currently implements.
+
+However, that's not how an active inner node's inuse is set. An inner node's
+inuse is solely determined by the ratio between the sums of inuse's and
+active's of its children - ie. they're results of propagating the leaves'
+active and inuse weights upwards. __propagate_weights() incorrectly applies
+the same clamping as for a leaf when an active inner node's weight is
+updated. Consider a hierarchy which looks like the following with saturating
+workloads in AA and BB.
+
+     R
+   /   \
+  A     B
+  |     |
+ AA     BB
+
+1. For both A and B, active=100, inuse=100, hwa=0.5, hwi=0.5.
+
+2. echo 200 > A/io.weight
+
+3. __propagate_weights() update A's active to 200 and leave inuse at 100 as
+   it's already between 1 and the new active, making A:active=200,
+   A:inuse=100. As R's active_sum is updated along with A's active,
+   A:hwa=2/3, B:hwa=1/3. However, because the inuses didn't change, the
+   hwi's remain unchanged at 0.5.
+
+4. The weight of A is now twice that of B but AA and BB still have the same
+   hwi of 0.5 and thus are doing the same amount of IOs.
+
+Fix it by making __propgate_weights() always calculate the inuse of an
+active inner iocg based on the ratio of child_inuse_sum to child_active_sum.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Dan Schatzberg <dschatzberg@fb.com>
+Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
+Cc: stable@vger.kernel.org # v5.4+
+Link: https://lore.kernel.org/r/YJsxnLZV1MnBcqjj@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-iocost.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -1073,7 +1073,17 @@ static void __propagate_weights(struct i
+       lockdep_assert_held(&ioc->lock);
+-      inuse = clamp_t(u32, inuse, 1, active);
++      /*
++       * For an active leaf node, its inuse shouldn't be zero or exceed
++       * @active. An active internal node's inuse is solely determined by the
++       * inuse to active ratio of its children regardless of @inuse.
++       */
++      if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
++              inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
++                                         iocg->child_active_sum);
++      } else {
++              inuse = clamp_t(u32, inuse, 1, active);
++      }
+       iocg->last_inuse = iocg->inuse;
+       if (save)
+@@ -1090,7 +1100,7 @@ static void __propagate_weights(struct i
+               /* update the level sums */
+               parent->child_active_sum += (s32)(active - child->active);
+               parent->child_inuse_sum += (s32)(inuse - child->inuse);
+-              /* apply the udpates */
++              /* apply the updates */
+               child->active = active;
+               child->inuse = inuse;
diff --git a/queue-5.12/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch b/queue-5.12/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
new file mode 100644 (file)
index 0000000..44bc56b
--- /dev/null
@@ -0,0 +1,229 @@
+From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 22 Apr 2021 12:08:05 +0100
+Subject: btrfs: fix deadlock when cloning inline extents and using qgroups
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit f9baa501b4fd6962257853d46ddffbc21f27e344 upstream.
+
+There are a few exceptional cases where cloning an inline extent needs to
+copy the inline extent data into a page of the destination inode.
+
+When this happens, we end up starting a transaction while having a dirty
+page for the destination inode and while having the range locked in the
+destination's inode iotree too. Because when reserving metadata space
+for a transaction we may need to flush existing delalloc in case there is
+not enough free space, we have a mechanism in place to prevent a deadlock,
+which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
+cloning inline extent and low on free metadata space").
+
+However when using qgroups, a transaction also reserves metadata qgroup
+space, which can also result in flushing delalloc in case there is not
+enough available space at the moment. When this happens we deadlock, since
+flushing delalloc requires locking the file range in the inode's iotree
+and the range was already locked at the very beginning of the clone
+operation, before attempting to start the transaction.
+
+When this issue happens, stack traces like the following are reported:
+
+  [72747.556262] task:kworker/u81:9   state:D stack:    0 pid:  225 ppid:     2 flags:0x00004000
+  [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
+  [72747.556271] Call Trace:
+  [72747.556273]  __schedule+0x296/0x760
+  [72747.556277]  schedule+0x3c/0xa0
+  [72747.556279]  io_schedule+0x12/0x40
+  [72747.556284]  __lock_page+0x13c/0x280
+  [72747.556287]  ? generic_file_readonly_mmap+0x70/0x70
+  [72747.556325]  extent_write_cache_pages+0x22a/0x440 [btrfs]
+  [72747.556331]  ? __set_page_dirty_nobuffers+0xe7/0x160
+  [72747.556358]  ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
+  [72747.556362]  ? update_group_capacity+0x25/0x210
+  [72747.556366]  ? cpumask_next_and+0x1a/0x20
+  [72747.556391]  extent_writepages+0x44/0xa0 [btrfs]
+  [72747.556394]  do_writepages+0x41/0xd0
+  [72747.556398]  __writeback_single_inode+0x39/0x2a0
+  [72747.556403]  writeback_sb_inodes+0x1ea/0x440
+  [72747.556407]  __writeback_inodes_wb+0x5f/0xc0
+  [72747.556410]  wb_writeback+0x235/0x2b0
+  [72747.556414]  ? get_nr_inodes+0x35/0x50
+  [72747.556417]  wb_workfn+0x354/0x490
+  [72747.556420]  ? newidle_balance+0x2c5/0x3e0
+  [72747.556424]  process_one_work+0x1aa/0x340
+  [72747.556426]  worker_thread+0x30/0x390
+  [72747.556429]  ? create_worker+0x1a0/0x1a0
+  [72747.556432]  kthread+0x116/0x130
+  [72747.556435]  ? kthread_park+0x80/0x80
+  [72747.556438]  ret_from_fork+0x1f/0x30
+
+  [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
+  [72747.566961] Call Trace:
+  [72747.566964]  __schedule+0x296/0x760
+  [72747.566968]  ? finish_wait+0x80/0x80
+  [72747.566970]  schedule+0x3c/0xa0
+  [72747.566995]  wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
+  [72747.566999]  ? finish_wait+0x80/0x80
+  [72747.567024]  lock_extent_bits+0x37/0x90 [btrfs]
+  [72747.567047]  btrfs_invalidatepage+0x299/0x2c0 [btrfs]
+  [72747.567051]  ? find_get_pages_range_tag+0x2cd/0x380
+  [72747.567076]  __extent_writepage+0x203/0x320 [btrfs]
+  [72747.567102]  extent_write_cache_pages+0x2bb/0x440 [btrfs]
+  [72747.567106]  ? update_load_avg+0x7e/0x5f0
+  [72747.567109]  ? enqueue_entity+0xf4/0x6f0
+  [72747.567134]  extent_writepages+0x44/0xa0 [btrfs]
+  [72747.567137]  ? enqueue_task_fair+0x93/0x6f0
+  [72747.567140]  do_writepages+0x41/0xd0
+  [72747.567144]  __filemap_fdatawrite_range+0xc7/0x100
+  [72747.567167]  btrfs_run_delalloc_work+0x17/0x40 [btrfs]
+  [72747.567195]  btrfs_work_helper+0xc2/0x300 [btrfs]
+  [72747.567200]  process_one_work+0x1aa/0x340
+  [72747.567202]  worker_thread+0x30/0x390
+  [72747.567205]  ? create_worker+0x1a0/0x1a0
+  [72747.567208]  kthread+0x116/0x130
+  [72747.567211]  ? kthread_park+0x80/0x80
+  [72747.567214]  ret_from_fork+0x1f/0x30
+
+  [72747.569686] task:fsstress        state:D stack:    0 pid:841421 ppid:841417 flags:0x00000000
+  [72747.569689] Call Trace:
+  [72747.569691]  __schedule+0x296/0x760
+  [72747.569694]  schedule+0x3c/0xa0
+  [72747.569721]  try_flush_qgroup+0x95/0x140 [btrfs]
+  [72747.569725]  ? finish_wait+0x80/0x80
+  [72747.569753]  btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
+  [72747.569781]  btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
+  [72747.569804]  btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
+  [72747.569810]  ? path_lookupat.isra.48+0x97/0x140
+  [72747.569833]  btrfs_file_write_iter+0x81/0x410 [btrfs]
+  [72747.569836]  ? __kmalloc+0x16a/0x2c0
+  [72747.569839]  do_iter_readv_writev+0x160/0x1c0
+  [72747.569843]  do_iter_write+0x80/0x1b0
+  [72747.569847]  vfs_writev+0x84/0x140
+  [72747.569869]  ? btrfs_file_llseek+0x38/0x270 [btrfs]
+  [72747.569873]  do_writev+0x65/0x100
+  [72747.569876]  do_syscall_64+0x33/0x40
+  [72747.569879]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  [72747.569899] task:fsstress        state:D stack:    0 pid:841424 ppid:841417 flags:0x00004000
+  [72747.569903] Call Trace:
+  [72747.569906]  __schedule+0x296/0x760
+  [72747.569909]  schedule+0x3c/0xa0
+  [72747.569936]  try_flush_qgroup+0x95/0x140 [btrfs]
+  [72747.569940]  ? finish_wait+0x80/0x80
+  [72747.569967]  __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
+  [72747.569989]  start_transaction+0x279/0x580 [btrfs]
+  [72747.570014]  clone_copy_inline_extent+0x332/0x490 [btrfs]
+  [72747.570041]  btrfs_clone+0x5b7/0x7a0 [btrfs]
+  [72747.570068]  ? lock_extent_bits+0x64/0x90 [btrfs]
+  [72747.570095]  btrfs_clone_files+0xfc/0x150 [btrfs]
+  [72747.570122]  btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
+  [72747.570126]  do_clone_file_range+0xed/0x200
+  [72747.570131]  vfs_clone_file_range+0x37/0x110
+  [72747.570134]  ioctl_file_clone+0x7d/0xb0
+  [72747.570137]  do_vfs_ioctl+0x138/0x630
+  [72747.570140]  __x64_sys_ioctl+0x62/0xc0
+  [72747.570143]  do_syscall_64+0x33/0x40
+  [72747.570146]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+So fix this by skipping the flush of delalloc for an inode that is
+flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
+such a special case of cloning an inline extent, when flushing delalloc
+during qgroup metadata reservation.
+
+The special cases for cloning inline extents were added in kernel 5.7 by
+by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
+inline extents"), while having qgroup metadata space reservation flushing
+delalloc when low on space was added in kernel 5.9 by commit
+c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
+-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
+kernel backports.
+
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/
+Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
+CC: stable@vger.kernel.org # 5.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h  |    2 +-
+ fs/btrfs/inode.c  |    4 ++--
+ fs/btrfs/ioctl.c  |    2 +-
+ fs/btrfs/qgroup.c |    2 +-
+ fs/btrfs/send.c   |    4 ++--
+ 5 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3110,7 +3110,7 @@ int btrfs_truncate_inode_items(struct bt
+                              struct btrfs_inode *inode, u64 new_size,
+                              u32 min_type);
+-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
++int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
+ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
+                              bool in_reclaim_context);
+ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -9672,7 +9672,7 @@ out:
+       return ret;
+ }
+-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
++int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
+ {
+       struct writeback_control wbc = {
+               .nr_to_write = LONG_MAX,
+@@ -9685,7 +9685,7 @@ int btrfs_start_delalloc_snapshot(struct
+       if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+               return -EROFS;
+-      return start_delalloc_inodes(root, &wbc, true, false);
++      return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
+ }
+ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1046,7 +1046,7 @@ static noinline int btrfs_mksnapshot(con
+        */
+       btrfs_drew_read_lock(&root->snapshot_lock);
+-      ret = btrfs_start_delalloc_snapshot(root);
++      ret = btrfs_start_delalloc_snapshot(root, false);
+       if (ret)
+               goto out;
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3579,7 +3579,7 @@ static int try_flush_qgroup(struct btrfs
+               return 0;
+       }
+-      ret = btrfs_start_delalloc_snapshot(root);
++      ret = btrfs_start_delalloc_snapshot(root, true);
+       if (ret < 0)
+               goto out;
+       btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -7139,7 +7139,7 @@ static int flush_delalloc_roots(struct s
+       int i;
+       if (root) {
+-              ret = btrfs_start_delalloc_snapshot(root);
++              ret = btrfs_start_delalloc_snapshot(root, false);
+               if (ret)
+                       return ret;
+               btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+@@ -7147,7 +7147,7 @@ static int flush_delalloc_roots(struct s
+       for (i = 0; i < sctx->clone_roots_cnt; i++) {
+               root = sctx->clone_roots[i].root;
+-              ret = btrfs_start_delalloc_snapshot(root);
++              ret = btrfs_start_delalloc_snapshot(root, false);
+               if (ret)
+                       return ret;
+               btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/queue-5.12/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch b/queue-5.12/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
new file mode 100644 (file)
index 0000000..638fe0a
--- /dev/null
@@ -0,0 +1,271 @@
+From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 27 Apr 2021 11:27:20 +0100
+Subject: btrfs: fix race leading to unpersisted data and metadata on fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 626e9f41f7c281ba3e02843702f68471706aa6d9 upstream.
+
+When doing a fast fsync on a file, there is a race which can result in the
+fsync returning success to user space without logging the inode and without
+durably persisting new data.
+
+The following example shows one possible scenario for this:
+
+   $ mkfs.btrfs -f /dev/sdc
+   $ mount /dev/sdc /mnt
+
+   $ touch /mnt/bar
+   $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz
+
+   # Now we have:
+   # file bar == inode 257
+   # file baz == inode 258
+
+   $ mv /mnt/baz /mnt/foo
+
+   # Now we have:
+   # file bar == inode 257
+   # file foo == inode 258
+
+   $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo
+
+   # fsync bar before foo, it is important to trigger the race.
+   $ xfs_io -c "fsync" /mnt/bar
+   $ xfs_io -c "fsync" /mnt/foo
+
+   # After this:
+   # inode 257, file bar, is empty
+   # inode 258, file foo, has 1M filled with 0xcd
+
+   <power failure>
+
+   # Replay the log:
+   $ mount /dev/sdc /mnt
+
+   # After this point file foo should have 1M filled with 0xcd and not 0xab
+
+The following steps explain how the race happens:
+
+1) Before the first fsync of inode 258, when it has the "baz" name, its
+   ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1.
+   The inode also has the full sync flag set;
+
+2) After the first fsync, we set inode 258 ->logged_trans to 6, which is
+   the generation of the current transaction, and set ->last_log_commit
+   to 0, which is the current value of ->last_sub_trans (done at
+   btrfs_log_inode()).
+
+   The full sync flag is cleared from the inode during the fsync.
+
+   The log sub transaction that was committed had an ID of 0 and when we
+   synced the log, at btrfs_sync_log(), we incremented root->log_transid
+   from 0 to 1;
+
+3) During the rename:
+
+   We update inode 258, through btrfs_update_inode(), and that causes its
+   ->last_sub_trans to be set to 1 (the current log transaction ID), and
+   ->last_log_commit remains with a value of 0.
+
+   After updating inode 258, because we have previously logged the inode
+   in the previous fsync, we log again the inode through the call to
+   btrfs_log_new_name(). This results in updating the inode's
+   ->last_log_commit from 0 to 1 (the current value of its
+   ->last_sub_trans).
+
+   The ->last_sub_trans of inode 257 is updated to 1, which is the ID of
+   the next log transaction;
+
+4) Then a buffered write against inode 258 is made. This leaves the value
+   of ->last_sub_trans as 1 (the ID of the current log transaction, stored
+   at root->log_transid);
+
+5) Then an fsync against inode 257 (or any other inode other than 258),
+   happens. This results in committing the log transaction with ID 1,
+   which results in updating root->last_log_commit to 1 and bumping
+   root->log_transid from 1 to 2;
+
+6) Then an fsync against inode 258 starts. We flush delalloc and wait only
+   for writeback to complete, since the full sync flag is not set in the
+   inode's runtime flags - we do not wait for ordered extents to complete.
+
+   Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the
+   ordered extent completes. The call returns true:
+
+     static inline bool btrfs_inode_in_log(...)
+     {
+         bool ret = false;
+
+         spin_lock(&inode->lock);
+         if (inode->logged_trans == generation &&
+             inode->last_sub_trans <= inode->last_log_commit &&
+             inode->last_sub_trans <= inode->root->last_log_commit)
+                 ret = true;
+         spin_unlock(&inode->lock);
+         return ret;
+     }
+
+   generation has a value of 6 (fs_info->generation), ->logged_trans also
+   has a value of 6 (set when we logged the inode during the first fsync
+   and when logging it during the rename), ->last_sub_trans has a value
+   of 1, set during the rename (step 3), ->last_log_commit also has a
+   value of 1 (set in step 3) and root->last_log_commit has a value of 1,
+   which was set in step 5 when fsyncing inode 257.
+
+   As a consequence we don't log the inode, any new extents and do not
+   sync the log, resulting in a data loss if a power failure happens
+   after the fsync and before the current transaction commits.
+   Also, because we do not log the inode, after a power failure the mtime
+   and ctime of the inode do not match those we had before.
+
+   When the ordered extent completes before we call btrfs_inode_in_log(),
+   then the call returns false and we log the inode and sync the log,
+   since at the end of ordered extent completion we update the inode and
+   set ->last_sub_trans to 2 (the value of root->log_transid) and
+   ->last_log_commit to 1.
+
+This problem is found after removing the check for the emptiness of the
+inode's list of modified extents in the recent commit 209ecbb8585bf6
+("btrfs: remove stale comment and logic from btrfs_inode_in_log()"),
+added in the 5.13 merge window. However checking the emptiness of the
+list is not really the way to solve this problem, and was never intended
+to, because while that solves the problem for COW writes, the problem
+persists for NOCOW writes because in that case the list is always empty.
+
+In the case of NOCOW writes, even though we wait for the writeback to
+complete before returning from btrfs_sync_file(), we end up not logging
+the inode, which has a new mtime/ctime, and because we don't sync the log,
+we never issue disk barriers (send REQ_PREFLUSH to the device) since that
+only happens when we sync the log (when we write super blocks at
+btrfs_sync_log()). So effectively, for a NOCOW case, when we return from
+btrfs_sync_file() to user space, we are not guaranteeing that the data is
+durably persisted on disk.
+
+Also, while the example above uses a rename exchange to show how the
+problem happens, it is not the only way to trigger it. An alternative
+could be adding a new hard link to inode 258, since that also results
+in calling btrfs_log_new_name() and updating the inode in the log.
+An example reproducer using the addition of a hard link instead of a
+rename operation:
+
+  $ mkfs.btrfs -f /dev/sdc
+  $ mount /dev/sdc /mnt
+
+  $ touch /mnt/bar
+  $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo
+
+  $ ln /mnt/foo /mnt/foo_link
+  $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo
+
+  $ xfs_io -c "fsync" /mnt/bar
+  $ xfs_io -c "fsync" /mnt/foo
+
+  <power failure>
+
+  # Replay the log:
+  $ mount /dev/sdc /mnt
+
+  # After this point file foo often has 1M filled with 0xab and not 0xcd
+
+The reasons leading to the final fsync of file foo, inode 258, not
+persisting the new data are the same as for the previous example with
+a rename operation.
+
+So fix by never skipping logging and log syncing when there are still any
+ordered extents in flight. To avoid making the conditional if statement
+that checks if logging an inode is needed harder to read, place all the
+logic into an helper function with separate if statements to make it more
+manageable and easier to read.
+
+A test case for fstests will follow soon.
+
+For NOCOW writes, the problem existed before commit b5e6c3e170b770
+("btrfs: always wait on ordered extents at fsync time"), introduced in
+kernel 4.19, then it went away with that commit since we started to always
+wait for ordered extent completion before logging.
+
+The problem came back again once the fast fsync path was changed again to
+avoid waiting for ordered extent completion, in commit 487781796d3022
+("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10.
+
+However, for COW writes, the race only happens after the recent
+commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from
+btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW
+writes, the bug existed before that commit. So tag 5.10+ as the release
+for stable backports.
+
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c     |   35 +++++++++++++++++++++++++----------
+ fs/btrfs/tree-log.c |    3 ++-
+ 2 files changed, 27 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inod
+       return ret;
+ }
++static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
++{
++      struct btrfs_inode *inode = BTRFS_I(ctx->inode);
++      struct btrfs_fs_info *fs_info = inode->root->fs_info;
++
++      if (btrfs_inode_in_log(inode, fs_info->generation) &&
++          list_empty(&ctx->ordered_extents))
++              return true;
++
++      /*
++       * If we are doing a fast fsync we can not bail out if the inode's
++       * last_trans is <= then the last committed transaction, because we only
++       * update the last_trans of the inode during ordered extent completion,
++       * and for a fast fsync we don't wait for that, we only wait for the
++       * writeback to complete.
++       */
++      if (inode->last_trans <= fs_info->last_trans_committed &&
++          (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
++           list_empty(&ctx->ordered_extents)))
++              return true;
++
++      return false;
++}
++
+ /*
+  * fsync call for both files and directories.  This logs the inode into
+  * the tree log instead of forcing full commits whenever possible.
+@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, l
+       atomic_inc(&root->log_batch);
+-      /*
+-       * If we are doing a fast fsync we can not bail out if the inode's
+-       * last_trans is <= then the last committed transaction, because we only
+-       * update the last_trans of the inode during ordered extent completion,
+-       * and for a fast fsync we don't wait for that, we only wait for the
+-       * writeback to complete.
+-       */
+       smp_mb();
+-      if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
+-          (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
+-           (full_sync || list_empty(&ctx.ordered_extents)))) {
++      if (skip_inode_logging(&ctx)) {
+               /*
+                * We've had everything committed since the last time we were
+                * modified so clear this flag in case it was set for whatever
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6060,7 +6060,8 @@ static int btrfs_log_inode_parent(struct
+        * (since logging them is pointless, a link count of 0 means they
+        * will never be accessible).
+        */
+-      if (btrfs_inode_in_log(inode, trans->transid) ||
++      if ((btrfs_inode_in_log(inode, trans->transid) &&
++           list_empty(&ctx->ordered_extents)) ||
+           inode->vfs_inode.i_nlink == 0) {
+               ret = BTRFS_NO_LOG_SYNC;
+               goto end_no_trans;
diff --git a/queue-5.12/btrfs-initialize-return-variable-in-cleanup_free_space_cache_v1.patch b/queue-5.12/btrfs-initialize-return-variable-in-cleanup_free_space_cache_v1.patch
new file mode 100644 (file)
index 0000000..d2c7cda
--- /dev/null
@@ -0,0 +1,40 @@
+From 77364faf21b4105ee5adbb4844fdfb461334d249 Mon Sep 17 00:00:00 2001
+From: Tom Rix <trix@redhat.com>
+Date: Fri, 30 Apr 2021 11:06:55 -0700
+Subject: btrfs: initialize return variable in cleanup_free_space_cache_v1
+
+From: Tom Rix <trix@redhat.com>
+
+commit 77364faf21b4105ee5adbb4844fdfb461334d249 upstream.
+
+Static analysis reports this problem
+
+  free-space-cache.c:3965:2: warning: Undefined or garbage value returned
+    return ret;
+    ^~~~~~~~~~
+
+ret is set in the node handling loop.  Treat doing nothing as a success
+and initialize ret to 0, although it's unlikely the loop would be
+skipped. We always have block groups, but as it could lead to
+transaction abort in the caller it's better to be safe.
+
+CC: stable@vger.kernel.org # 5.12+
+Signed-off-by: Tom Rix <trix@redhat.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/free-space-cache.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/free-space-cache.c
++++ b/fs/btrfs/free-space-cache.c
+@@ -3942,7 +3942,7 @@ static int cleanup_free_space_cache_v1(s
+ {
+       struct btrfs_block_group *block_group;
+       struct rb_node *node;
+-      int ret;
++      int ret = 0;
+       btrfs_info(fs_info, "cleaning free space cache v1");
diff --git a/queue-5.12/btrfs-zoned-fix-silent-data-loss-after-failure-splitting-ordered-extent.patch b/queue-5.12/btrfs-zoned-fix-silent-data-loss-after-failure-splitting-ordered-extent.patch
new file mode 100644 (file)
index 0000000..e465159
--- /dev/null
@@ -0,0 +1,54 @@
+From adbd914dcde0b03bfc08ffe40b81f31b0457833f Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Wed, 21 Apr 2021 14:31:50 +0100
+Subject: btrfs: zoned: fix silent data loss after failure splitting ordered extent
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit adbd914dcde0b03bfc08ffe40b81f31b0457833f upstream.
+
+On a zoned filesystem, sometimes we need to split an ordered extent into 3
+different ordered extents. The original ordered extent is shortened, at
+the front and at the rear, and we create two other new ordered extents to
+represent the trimmed parts of the original ordered extent.
+
+After adjusting the original ordered extent, we create an ordered extent
+to represent the pre-range, and that may fail with ENOMEM for example.
+After that we always try to create the ordered extent for the post-range,
+and if that happens to succeed we end up returning success to the caller
+as we overwrite the 'ret' variable which contained the previous error.
+
+This means we end up with a file range for which there is no ordered
+extent, which results in the range never getting a new file extent item
+pointing to the new data location. And since the split operation did
+not return an error, writeback does not fail and the inode's mapping is
+not flagged with an error, resulting in a subsequent fsync not reporting
+an error either.
+
+It's possibly very unlikely to have the creation of the post-range ordered
+extent succeed after the creation of the pre-range ordered extent failed,
+but it's not impossible.
+
+So fix this by making sure we only create the post-range ordered extent
+if there was no error creating the ordered extent for the pre-range.
+
+Fixes: d22002fd37bd97 ("btrfs: zoned: split ordered extent when bio is sent")
+CC: stable@vger.kernel.org # 5.12+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ordered-data.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -995,7 +995,7 @@ int btrfs_split_ordered_extent(struct bt
+       if (pre)
+               ret = clone_ordered_extent(ordered, 0, pre);
+-      if (post)
++      if (ret == 0 && post)
+               ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
+                                          post);
diff --git a/queue-5.12/btrfs-zoned-sanity-check-zone-type.patch b/queue-5.12/btrfs-zoned-sanity-check-zone-type.patch
new file mode 100644 (file)
index 0000000..386d7bc
--- /dev/null
@@ -0,0 +1,79 @@
+From 784daf2b9628f2d0117f1f0b578cfe5ab6634919 Mon Sep 17 00:00:00 2001
+From: Naohiro Aota <naohiro.aota@wdc.com>
+Date: Fri, 30 Apr 2021 15:34:17 +0200
+Subject: btrfs: zoned: sanity check zone type
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+commit 784daf2b9628f2d0117f1f0b578cfe5ab6634919 upstream.
+
+The fstests test case generic/475 creates a dm-linear device that gets
+changed to a dm-error device. This leads to errors in loading the block
+group's zone information when running on a zoned file system, ultimately
+resulting in a list corruption. When running on a kernel with list
+debugging enabled this leads to the following crash.
+
+ BTRFS: error (device dm-2) in cleanup_transaction:1953: errno=-5 IO failure
+ kernel BUG at lib/list_debug.c:54!
+ invalid opcode: 0000 [#1] SMP PTI
+ CPU: 1 PID: 2433 Comm: umount Tainted: G        W         5.12.0+ #1018
+ RIP: 0010:__list_del_entry_valid.cold+0x1d/0x47
+ RSP: 0018:ffffc90001473df0 EFLAGS: 00010296
+ RAX: 0000000000000054 RBX: ffff8881038fd000 RCX: ffffc90001473c90
+ RDX: 0000000100001a31 RSI: 0000000000000003 RDI: 0000000000000003
+ RBP: ffff888308871108 R08: 0000000000000003 R09: 0000000000000001
+ R10: 3961373532383838 R11: 6666666620736177 R12: ffff888308871000
+ R13: ffff8881038fd088 R14: ffff8881038fdc78 R15: dead000000000100
+ FS:  00007f353c9b1540(0000) GS:ffff888627d00000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 00007f353cc2c710 CR3: 000000018e13c000 CR4: 00000000000006a0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  btrfs_free_block_groups+0xc9/0x310 [btrfs]
+  close_ctree+0x2ee/0x31a [btrfs]
+  ? call_rcu+0x8f/0x270
+  ? mutex_lock+0x1c/0x40
+  generic_shutdown_super+0x67/0x100
+  kill_anon_super+0x14/0x30
+  btrfs_kill_super+0x12/0x20 [btrfs]
+  deactivate_locked_super+0x31/0x90
+  cleanup_mnt+0x13e/0x1b0
+  task_work_run+0x63/0xb0
+  exit_to_user_mode_loop+0xd9/0xe0
+  exit_to_user_mode_prepare+0x3e/0x60
+  syscall_exit_to_user_mode+0x1d/0x50
+  entry_SYSCALL_64_after_hwframe+0x44/0xae
+
+As dm-error has no support for zones, btrfs will run it's zone emulation
+mode on this device. The zone emulation mode emulates conventional zones,
+so bail out if the zone bitmap that gets populated on mount sees the zone
+as sequential while we're thinking it's a conventional zone when creating
+a block group.
+
+Note: this scenario is unlikely in a real wold application and can only
+happen by this (ab)use of device-mapper targets.
+
+CC: stable@vger.kernel.org # 5.12+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/zoned.c |    5 +++++
+ 1 file changed, 5 insertions(+)
+
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(str
+                       goto out;
+               }
++              if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
++                      ret = -EIO;
++                      goto out;
++              }
++
+               switch (zone.cond) {
+               case BLK_ZONE_COND_OFFLINE:
+               case BLK_ZONE_COND_READONLY:
diff --git a/queue-5.12/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch b/queue-5.12/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
new file mode 100644 (file)
index 0000000..6b588c0
--- /dev/null
@@ -0,0 +1,38 @@
+From fe1c97d008f86f672f0e9265f180c22451ca3b9f Mon Sep 17 00:00:00 2001
+From: David Ward <david.ward@gatech.edu>
+Date: Mon, 10 May 2021 05:30:39 -0400
+Subject: drm/amd/display: Initialize attribute for hdcp_srm sysfs file
+
+From: David Ward <david.ward@gatech.edu>
+
+commit fe1c97d008f86f672f0e9265f180c22451ca3b9f upstream.
+
+It is stored in dynamically allocated memory, so sysfs_bin_attr_init() must
+be called to initialize it. (Note: "initialization" only sets the .attr.key
+member in this struct; it does not change the value of any other members.)
+
+Otherwise, when CONFIG_DEBUG_LOCK_ALLOC=y this message appears during boot:
+
+    BUG: key ffff9248900cd148 has not been registered!
+
+Fixes: 9037246bb2da ("drm/amd/display: Add sysfs interface for set/get srm")
+Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1586
+Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
+Signed-off-by: David Ward <david.ward@gatech.edu>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+@@ -644,6 +644,7 @@ struct hdcp_workqueue *hdcp_create_workq
+       /* File created at /sys/class/drm/card0/device/hdcp_srm*/
+       hdcp_work[0].attr = data_attr;
++      sysfs_bin_attr_init(&hdcp_work[0].attr);
+       if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr))
+               DRM_WARN("Failed to create device file hdcp_srm");
diff --git a/queue-5.12/drm-i915-avoid-div-by-zero-on-gen2.patch b/queue-5.12/drm-i915-avoid-div-by-zero-on-gen2.patch
new file mode 100644 (file)
index 0000000..f006543
--- /dev/null
@@ -0,0 +1,47 @@
+From 4819d16d91145966ce03818a95169df1fd56b299 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Wed, 21 Apr 2021 18:33:58 +0300
+Subject: drm/i915: Avoid div-by-zero on gen2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville Syrjälä <ville.syrjala@linux.intel.com>
+
+commit 4819d16d91145966ce03818a95169df1fd56b299 upstream.
+
+Gen2 tiles are 2KiB in size so i915_gem_object_get_tile_row_size()
+can in fact return <4KiB, which leads to div-by-zero here.
+Avoid that.
+
+Not sure i915_gem_object_get_tile_row_size() is entirely
+sane anyway since it doesn't account for the different tile
+layouts on i8xx/i915...
+
+I'm not able to hit this before commit 6846895fde05 ("drm/i915:
+Replace PIN_NONFAULT with calls to PIN_NOEVICT") and it looks
+like I also need to run recent version of Mesa. With those in
+place xonotic trips on this quite easily on my 85x.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-2-ville.syrjala@linux.intel.com
+(cherry picked from commit ed52c62d386f764194e0184fdb905d5f24194cae)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_mman.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+@@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i9
+       struct i915_ggtt_view view;
+       if (i915_gem_object_is_tiled(obj))
+-              chunk = roundup(chunk, tile_row_pages(obj));
++              chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
+       view.type = I915_GGTT_VIEW_PARTIAL;
+       view.partial.offset = rounddown(page_offset, chunk);
diff --git a/queue-5.12/drm-i915-dp-use-slow-and-wide-link-training-for-everything.patch b/queue-5.12/drm-i915-dp-use-slow-and-wide-link-training-for-everything.patch
new file mode 100644 (file)
index 0000000..8fee111
--- /dev/null
@@ -0,0 +1,116 @@
+From a5c936add6a23c15c6ae538ab7a12f80751fdf0f Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Wed, 21 Apr 2021 13:20:31 +0800
+Subject: drm/i915/dp: Use slow and wide link training for everything
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit a5c936add6a23c15c6ae538ab7a12f80751fdf0f upstream.
+
+Screen flickers on Innolux eDP 1.3 panel when clock rate 540000 is in use.
+
+According to the panel vendor, though clock rate 540000 is advertised,
+but the max clock rate it really supports is 270000.
+
+Ville Syrjälä mentioned that fast and narrow also breaks some eDP 1.4
+panel, so use slow and wide training for all panels to resolve the
+issue.
+
+User also confirmed that the new strategy doesn't introduce any
+regression on XPS 9380.
+
+v2:
+ - Use slow and wide for everything.
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/3384
+References: https://gitlab.freedesktop.org/drm/intel/-/issues/272
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210421052054.1434718-1-kai.heng.feng@canonical.com
+(cherry picked from commit acca7762eb71bc05a8f28d29320d193150051f79)
+Fixes: 2bbd6dba84d4 ("drm/i915: Try to use fast+narrow link on eDP again and fall back to the old max strategy on failure")
+Cc: <stable@vger.kernel.org> # v5.12+
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_dp.c |   59 ++------------------------------
+ 1 file changed, 5 insertions(+), 54 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_dp.c
++++ b/drivers/gpu/drm/i915/display/intel_dp.c
+@@ -1174,44 +1174,6 @@ intel_dp_compute_link_config_wide(struct
+       return -EINVAL;
+ }
+-/* Optimize link config in order: max bpp, min lanes, min clock */
+-static int
+-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
+-                                struct intel_crtc_state *pipe_config,
+-                                const struct link_config_limits *limits)
+-{
+-      const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
+-      int bpp, clock, lane_count;
+-      int mode_rate, link_clock, link_avail;
+-
+-      for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
+-              int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp);
+-
+-              mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
+-                                                 output_bpp);
+-
+-              for (lane_count = limits->min_lane_count;
+-                   lane_count <= limits->max_lane_count;
+-                   lane_count <<= 1) {
+-                      for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
+-                              link_clock = intel_dp->common_rates[clock];
+-                              link_avail = intel_dp_max_data_rate(link_clock,
+-                                                                  lane_count);
+-
+-                              if (mode_rate <= link_avail) {
+-                                      pipe_config->lane_count = lane_count;
+-                                      pipe_config->pipe_bpp = bpp;
+-                                      pipe_config->port_clock = link_clock;
+-
+-                                      return 0;
+-                              }
+-                      }
+-              }
+-      }
+-
+-      return -EINVAL;
+-}
+-
+ static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
+ {
+       int i, num_bpc;
+@@ -1461,22 +1423,11 @@ intel_dp_compute_link_config(struct inte
+           intel_dp_can_bigjoiner(intel_dp))
+               pipe_config->bigjoiner = true;
+-      if (intel_dp_is_edp(intel_dp))
+-              /*
+-               * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
+-               * section A.1: "It is recommended that the minimum number of
+-               * lanes be used, using the minimum link rate allowed for that
+-               * lane configuration."
+-               *
+-               * Note that we fall back to the max clock and lane count for eDP
+-               * panels that fail with the fast optimal settings (see
+-               * intel_dp->use_max_params), in which case the fast vs. wide
+-               * choice doesn't matter.
+-               */
+-              ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits);
+-      else
+-              /* Optimize for slow and wide. */
+-              ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
++      /*
++       * Optimize for slow and wide for everything, because there are some
++       * eDP 1.3 and 1.4 panels don't work well with fast and narrow.
++       */
++      ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
+       /* enable compression if the mode doesn't fit available BW */
+       drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en);
diff --git a/queue-5.12/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch b/queue-5.12/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
new file mode 100644 (file)
index 0000000..24c144f
--- /dev/null
@@ -0,0 +1,85 @@
+From 227545b9a08c68778ddd89428f99c351fc9315ac Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Fri, 30 Apr 2021 12:56:56 +0800
+Subject: drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit 227545b9a08c68778ddd89428f99c351fc9315ac upstream.
+
+Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
+doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
+another one is 4K 30Hz (pixelclock 297MHz).
+
+The issue is gone after setting "power_dpm_force_performance_level" to
+"high". Following the indication, we found that the issue occurs when
+sclk is too low.
+
+So resolve the issue by disabling sclk switching when there are two
+monitors requires high pixelclock (> 297MHz).
+
+v2:
+ - Only apply the fix to Oland.
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/radeon/radeon.h    |    1 +
+ drivers/gpu/drm/radeon/radeon_pm.c |    8 ++++++++
+ drivers/gpu/drm/radeon/si_dpm.c    |    3 +++
+ 3 files changed, 12 insertions(+)
+
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -1558,6 +1558,7 @@ struct radeon_dpm {
+       void                    *priv;
+       u32                     new_active_crtcs;
+       int                     new_active_crtc_count;
++      int                     high_pixelclock_count;
+       u32                     current_active_crtcs;
+       int                     current_active_crtc_count;
+       bool single_display;
+--- a/drivers/gpu/drm/radeon/radeon_pm.c
++++ b/drivers/gpu/drm/radeon/radeon_pm.c
+@@ -1775,6 +1775,7 @@ static void radeon_pm_compute_clocks_dpm
+       struct drm_device *ddev = rdev->ddev;
+       struct drm_crtc *crtc;
+       struct radeon_crtc *radeon_crtc;
++      struct radeon_connector *radeon_connector;
+       if (!rdev->pm.dpm_enabled)
+               return;
+@@ -1784,6 +1785,7 @@ static void radeon_pm_compute_clocks_dpm
+       /* update active crtc counts */
+       rdev->pm.dpm.new_active_crtcs = 0;
+       rdev->pm.dpm.new_active_crtc_count = 0;
++      rdev->pm.dpm.high_pixelclock_count = 0;
+       if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
+               list_for_each_entry(crtc,
+                                   &ddev->mode_config.crtc_list, head) {
+@@ -1791,6 +1793,12 @@ static void radeon_pm_compute_clocks_dpm
+                       if (crtc->enabled) {
+                               rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id);
+                               rdev->pm.dpm.new_active_crtc_count++;
++                              if (!radeon_crtc->connector)
++                                      continue;
++
++                              radeon_connector = to_radeon_connector(radeon_crtc->connector);
++                              if (radeon_connector->pixelclock_for_modeset > 297000)
++                                      rdev->pm.dpm.high_pixelclock_count++;
+                       }
+               }
+       }
+--- a/drivers/gpu/drm/radeon/si_dpm.c
++++ b/drivers/gpu/drm/radeon/si_dpm.c
+@@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(
+                   (rdev->pdev->device == 0x6605)) {
+                       max_sclk = 75000;
+               }
++
++              if (rdev->pm.dpm.high_pixelclock_count > 1)
++                      disable_sclk_switching = true;
+       }
+       if (rps->vce_active) {
diff --git a/queue-5.12/hfsplus-prevent-corruption-in-shrinking-truncate.patch b/queue-5.12/hfsplus-prevent-corruption-in-shrinking-truncate.patch
new file mode 100644 (file)
index 0000000..706a839
--- /dev/null
@@ -0,0 +1,89 @@
+From c3187cf32216313fb316084efac4dab3a8459b1d Mon Sep 17 00:00:00 2001
+From: Jouni Roivas <jouni.roivas@tuxera.com>
+Date: Fri, 14 May 2021 17:27:33 -0700
+Subject: hfsplus: prevent corruption in shrinking truncate
+
+From: Jouni Roivas <jouni.roivas@tuxera.com>
+
+commit c3187cf32216313fb316084efac4dab3a8459b1d upstream.
+
+I believe there are some issues introduced by commit 31651c607151
+("hfsplus: avoid deadlock on file truncation")
+
+HFS+ has extent records which always contains 8 extents.  In case the
+first extent record in catalog file gets full, new ones are allocated from
+extents overflow file.
+
+In case shrinking truncate happens to middle of an extent record which
+locates in extents overflow file, the logic in hfsplus_file_truncate() was
+changed so that call to hfs_brec_remove() is not guarded any more.
+
+Right action would be just freeing the extents that exceed the new size
+inside extent record by calling hfsplus_free_extents(), and then check if
+the whole extent record should be removed.  However since the guard
+(blk_cnt > start) is now after the call to hfs_brec_remove(), this has
+unfortunate effect that the last matching extent record is removed
+unconditionally.
+
+To reproduce this issue, create a file which has at least 10 extents, and
+then perform shrinking truncate into middle of the last extent record, so
+that the number of remaining extents is not under or divisible by 8.  This
+causes the last extent record (8 extents) to be removed totally instead of
+truncating into middle of it.  Thus this causes corruption, and lost data.
+
+Fix for this is simply checking if the new truncated end is below the
+start of this extent record, making it safe to remove the full extent
+record.  However call to hfs_brec_remove() can't be moved to it's previous
+place since we're dropping ->tree_lock and it can cause a race condition
+and the cached info being invalidated possibly corrupting the node data.
+
+Another issue is related to this one.  When entering into the block
+(blk_cnt > start) we are not holding the ->tree_lock.  We break out from
+the loop not holding the lock, but hfs_find_exit() does unlock it.  Not
+sure if it's possible for someone else to take the lock under our feet,
+but it can cause hard to debug errors and premature unlocking.  Even if
+there's no real risk of it, the locking should still always be kept in
+balance.  Thus taking the lock now just before the check.
+
+Link: https://lkml.kernel.org/r/20210429165139.3082828-1-jouni.roivas@tuxera.com
+Fixes: 31651c607151f ("hfsplus: avoid deadlock on file truncation")
+Signed-off-by: Jouni Roivas <jouni.roivas@tuxera.com>
+Reviewed-by: Anton Altaparmakov <anton@tuxera.com>
+Cc: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Cc: Viacheslav Dubeyko <slava@dubeyko.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfsplus/extents.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/hfsplus/extents.c
++++ b/fs/hfsplus/extents.c
+@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode
+               res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
+               if (res)
+                       break;
+-              hfs_brec_remove(&fd);
+-              mutex_unlock(&fd.tree->tree_lock);
+               start = hip->cached_start;
++              if (blk_cnt <= start)
++                      hfs_brec_remove(&fd);
++              mutex_unlock(&fd.tree->tree_lock);
+               hfsplus_free_extents(sb, hip->cached_extents,
+                                    alloc_cnt - start, alloc_cnt - blk_cnt);
+               hfsplus_dump_extent(hip->cached_extents);
++              mutex_lock(&fd.tree->tree_lock);
+               if (blk_cnt > start) {
+                       hip->extent_state |= HFSPLUS_EXT_DIRTY;
+                       break;
+@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode
+               alloc_cnt = start;
+               hip->cached_start = hip->cached_blocks = 0;
+               hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
+-              mutex_lock(&fd.tree->tree_lock);
+       }
+       hfs_find_exit(&fd);
diff --git a/queue-5.12/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch b/queue-5.12/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
new file mode 100644 (file)
index 0000000..861fd64
--- /dev/null
@@ -0,0 +1,96 @@
+From f649dc0e0d7b509c75570ee403723660f5b72ec7 Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 14 May 2021 17:27:27 -0700
+Subject: kasan: fix unit tests with CONFIG_UBSAN_LOCAL_BOUNDS enabled
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit f649dc0e0d7b509c75570ee403723660f5b72ec7 upstream.
+
+These tests deliberately access these arrays out of bounds, which will
+cause the dynamic local bounds checks inserted by
+CONFIG_UBSAN_LOCAL_BOUNDS to fail and panic the kernel.  To avoid this
+problem, access the arrays via volatile pointers, which will prevent the
+compiler from being able to determine the array bounds.
+
+These accesses use volatile pointers to char (char *volatile) rather than
+the more conventional pointers to volatile char (volatile char *) because
+we want to prevent the compiler from making inferences about the pointer
+itself (i.e.  its array bounds), not the data that it refers to.
+
+Link: https://lkml.kernel.org/r/20210507025915.1464056-1-pcc@google.com
+Link: https://linux-review.googlesource.com/id/I90b1713fbfa1bf68ff895aef099ea77b98a7c3b9
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Tested-by: Alexander Potapenko <glider@google.com>
+Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: Peter Collingbourne <pcc@google.com>
+Cc: George Popescu <georgepope@android.com>
+Cc: Elena Petrova <lenaptr@google.com>
+Cc: Evgenii Stepanov <eugenis@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/test_kasan.c |   29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+--- a/lib/test_kasan.c
++++ b/lib/test_kasan.c
+@@ -646,8 +646,20 @@ static char global_array[10];
+ static void kasan_global_oob(struct kunit *test)
+ {
+-      volatile int i = 3;
+-      char *p = &global_array[ARRAY_SIZE(global_array) + i];
++      /*
++       * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
++       * from failing here and panicing the kernel, access the array via a
++       * volatile pointer, which will prevent the compiler from being able to
++       * determine the array bounds.
++       *
++       * This access uses a volatile pointer to char (char *volatile) rather
++       * than the more conventional pointer to volatile char (volatile char *)
++       * because we want to prevent the compiler from making inferences about
++       * the pointer itself (i.e. its array bounds), not the data that it
++       * refers to.
++       */
++      char *volatile array = global_array;
++      char *p = &array[ARRAY_SIZE(global_array) + 3];
+       /* Only generic mode instruments globals. */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+@@ -695,8 +707,9 @@ static void ksize_uaf(struct kunit *test
+ static void kasan_stack_oob(struct kunit *test)
+ {
+       char stack_array[10];
+-      volatile int i = OOB_TAG_OFF;
+-      char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
++      /* See comment in kasan_global_oob. */
++      char *volatile array = stack_array;
++      char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
+@@ -707,7 +720,9 @@ static void kasan_alloca_oob_left(struct
+ {
+       volatile int i = 10;
+       char alloca_array[i];
+-      char *p = alloca_array - 1;
++      /* See comment in kasan_global_oob. */
++      char *volatile array = alloca_array;
++      char *p = array - 1;
+       /* Only generic mode instruments dynamic allocas. */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+@@ -720,7 +735,9 @@ static void kasan_alloca_oob_right(struc
+ {
+       volatile int i = 10;
+       char alloca_array[i];
+-      char *p = alloca_array + i;
++      /* See comment in kasan_global_oob. */
++      char *volatile array = alloca_array;
++      char *p = array + i;
+       /* Only generic mode instruments dynamic allocas. */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
diff --git a/queue-5.12/kvm-exit-halt-polling-on-need_resched-as-well.patch b/queue-5.12/kvm-exit-halt-polling-on-need_resched-as-well.patch
new file mode 100644 (file)
index 0000000..4b57069
--- /dev/null
@@ -0,0 +1,38 @@
+From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001
+From: Benjamin Segall <bsegall@google.com>
+Date: Thu, 29 Apr 2021 16:22:34 +0000
+Subject: kvm: exit halt polling on need_resched() as well
+
+From: Benjamin Segall <bsegall@google.com>
+
+commit 262de4102c7bb8e59f26a967a8ffe8cce85cc537 upstream.
+
+single_task_running() is usually more general than need_resched()
+but CFS_BANDWIDTH throttling will use resched_task() when there
+is just one task to get the task to block. This was causing
+long-need_resched warnings and was likely allowing VMs to
+overrun their quota when halt polling.
+
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
+Message-Id: <20210429162233.116849-1-venkateshs@chromium.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2838,7 +2838,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
+                               goto out;
+                       }
+                       poll_end = cur = ktime_get();
+-              } while (single_task_running() && ktime_before(cur, stop));
++              } while (single_task_running() && !need_resched() &&
++                       ktime_before(cur, stop));
+       }
+       prepare_to_rcuwait(&vcpu->wait);
diff --git a/queue-5.12/mm-hugetlb-fix-cow-where-page-writtable-in-child.patch b/queue-5.12/mm-hugetlb-fix-cow-where-page-writtable-in-child.patch
new file mode 100644 (file)
index 0000000..db3c002
--- /dev/null
@@ -0,0 +1,42 @@
+From 84894e1c42e9f25c17f2888e0c0e1505cb727538 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Fri, 14 May 2021 17:27:07 -0700
+Subject: mm/hugetlb: fix cow where page writtable in child
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 84894e1c42e9f25c17f2888e0c0e1505cb727538 upstream.
+
+When rework early cow of pinned hugetlb pages, we moved huge_ptep_get()
+upper but overlooked a side effect that the huge_ptep_get() will fetch the
+pte after wr-protection.  After moving it upwards, we need explicit
+wr-protect of child pte or we will keep the write bit set in the child
+process, which could cause data corrution where the child can write to the
+original page directly.
+
+This issue can also be exposed by "memfd_test hugetlbfs" kselftest.
+
+Link: https://lkml.kernel.org/r/20210503234356.9097-3-peterx@redhat.com
+Fixes: 4eae4efa2c299 ("hugetlb: do early cow when page pinned on src mm")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Hugh Dickins <hughd@google.com>
+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -3905,6 +3905,7 @@ again:
+                                * See Documentation/vm/mmu_notifier.rst
+                                */
+                               huge_ptep_set_wrprotect(src, addr, src_pte);
++                              entry = huge_pte_wrprotect(entry);
+                       }
+                       page_dup_rmap(ptepage, true);
diff --git a/queue-5.12/mm-hugetlb-fix-f_seal_future_write.patch b/queue-5.12/mm-hugetlb-fix-f_seal_future_write.patch
new file mode 100644 (file)
index 0000000..88ad67d
--- /dev/null
@@ -0,0 +1,150 @@
+From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Fri, 14 May 2021 17:27:04 -0700
+Subject: mm/hugetlb: fix F_SEAL_FUTURE_WRITE
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 22247efd822e6d263f3c8bd327f3f769aea9b1d9 upstream.
+
+Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2.
+
+Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to
+hugetlbfs, which I can easily verify using the memfd_test program, which
+seems that the program is hardly run with hugetlbfs pages (as by default
+shmem).
+
+Meanwhile I found another probably even more severe issue on that hugetlb
+fork won't wr-protect child cow pages, so child can potentially write to
+parent private pages.  Patch 2 addresses that.
+
+After this series applied, "memfd_test hugetlbfs" should start to pass.
+
+This patch (of 2):
+
+F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
+There is a test program for that and it fails constantly.
+
+$ ./memfd_test hugetlbfs
+memfd-hugetlb: CREATE
+memfd-hugetlb: BASIC
+memfd-hugetlb: SEAL-WRITE
+memfd-hugetlb: SEAL-FUTURE-WRITE
+mmap() didn't fail as expected
+Aborted (core dumped)
+
+I think it's probably because no one is really running the hugetlbfs test.
+
+Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we
+do in shmem_mmap().  Generalize a helper for that.
+
+Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com
+Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |    5 +++++
+ include/linux/mm.h   |   32 ++++++++++++++++++++++++++++++++
+ mm/shmem.c           |   22 ++++------------------
+ 3 files changed, 41 insertions(+), 18 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct
+ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+       struct inode *inode = file_inode(file);
++      struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+       loff_t len, vma_len;
+       int ret;
+       struct hstate *h = hstate_file(file);
+@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct fi
+       vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+       vma->vm_ops = &hugetlb_vm_ops;
++      ret = seal_check_future_write(info->seals, vma);
++      if (ret)
++              return ret;
++
+       /*
+        * page based offset in vm_pgoff could be sufficiently large to
+        * overflow a loff_t when converted to byte offset.  This can
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -3170,5 +3170,37 @@ extern int sysctl_nr_trim_pages;
+ void mem_dump_obj(void *object);
++/**
++ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
++ * @seals: the seals to check
++ * @vma: the vma to operate on
++ *
++ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
++ * the vma flags.  Return 0 if check pass, or <0 for errors.
++ */
++static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
++{
++      if (seals & F_SEAL_FUTURE_WRITE) {
++              /*
++               * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
++               * "future write" seal active.
++               */
++              if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
++                      return -EPERM;
++
++              /*
++               * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
++               * MAP_SHARED and read-only, take care to not allow mprotect to
++               * revert protections on such mappings. Do this only for shared
++               * mappings. For private mappings, don't need to mask
++               * VM_MAYWRITE as we still want them to be COW-writable.
++               */
++              if (vma->vm_flags & VM_SHARED)
++                      vma->vm_flags &= ~(VM_MAYWRITE);
++      }
++
++      return 0;
++}
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_MM_H */
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2258,25 +2258,11 @@ out_nomem:
+ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+       struct shmem_inode_info *info = SHMEM_I(file_inode(file));
++      int ret;
+-      if (info->seals & F_SEAL_FUTURE_WRITE) {
+-              /*
+-               * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+-               * "future write" seal active.
+-               */
+-              if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+-                      return -EPERM;
+-
+-              /*
+-               * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+-               * MAP_SHARED and read-only, take care to not allow mprotect to
+-               * revert protections on such mappings. Do this only for shared
+-               * mappings. For private mappings, don't need to mask
+-               * VM_MAYWRITE as we still want them to be COW-writable.
+-               */
+-              if (vma->vm_flags & VM_SHARED)
+-                      vma->vm_flags &= ~(VM_MAYWRITE);
+-      }
++      ret = seal_check_future_write(info->seals, vma);
++      if (ret)
++              return ret;
+       /* arm64 - allow memory tagging on RAM-based files */
+       vma->vm_flags |= VM_MTE_ALLOWED;
diff --git a/queue-5.12/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch b/queue-5.12/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
new file mode 100644 (file)
index 0000000..8f9cdcc
--- /dev/null
@@ -0,0 +1,70 @@
+From aec86b052df6541cc97c5fca44e5934cbea4963b Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 6 May 2021 14:49:59 +1000
+Subject: powerpc/64s: Fix crashes when toggling entry flush barrier
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit aec86b052df6541cc97c5fca44e5934cbea4963b upstream.
+
+The entry flush mitigation can be enabled/disabled at runtime via a
+debugfs file (entry_flush), which causes the kernel to patch itself to
+enable/disable the relevant mitigations.
+
+However depending on which mitigation we're using, it may not be safe to
+do that patching while other CPUs are active. For example the following
+crash:
+
+  sleeper[15639]: segfault (11) at c000000000004c20 nip c000000000004c20 lr c000000000004c20
+
+Shows that we returned to userspace with a corrupted LR that points into
+the kernel, due to executing the partially patched call to the fallback
+entry flush (ie. we missed the LR restore).
+
+Fix it by doing the patching under stop machine. The CPUs that aren't
+doing the patching will be spinning in the core of the stop machine
+logic. That is currently sufficient for our purposes, because none of
+the patching we do is to that code or anywhere in the vicinity.
+
+Fixes: f79643787e0a ("powerpc/64s: flush L1D on kernel entry")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210506044959.1298123-2-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/lib/feature-fixups.c |   16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -299,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_fl
+                                               : "unknown");
+ }
+-void do_entry_flush_fixups(enum l1d_flush_type types)
++static int __do_entry_flush_fixups(void *data)
+ {
++      enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+       unsigned int instrs[3], *dest;
+       long *start, *end;
+       int i;
+@@ -369,6 +370,19 @@ void do_entry_flush_fixups(enum l1d_flus
+                                                       : "ori type" :
+               (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+                                               : "unknown");
++
++      return 0;
++}
++
++void do_entry_flush_fixups(enum l1d_flush_type types)
++{
++      /*
++       * The call to the fallback flush can not be safely patched in/out while
++       * other CPUs are executing it. So call __do_entry_flush_fixups() on one
++       * CPU while all other CPUs spin in the stop machine core with interrupts
++       * hard disabled.
++       */
++      stop_machine(__do_entry_flush_fixups, &types, NULL);
+ }
+ void do_rfi_flush_fixups(enum l1d_flush_type types)
diff --git a/queue-5.12/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch b/queue-5.12/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
new file mode 100644 (file)
index 0000000..c9ef903
--- /dev/null
@@ -0,0 +1,78 @@
+From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 6 May 2021 14:49:58 +1000
+Subject: powerpc/64s: Fix crashes when toggling stf barrier
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a upstream.
+
+The STF (store-to-load forwarding) barrier mitigation can be
+enabled/disabled at runtime via a debugfs file (stf_barrier), which
+causes the kernel to patch itself to enable/disable the relevant
+mitigations.
+
+However depending on which mitigation we're using, it may not be safe to
+do that patching while other CPUs are active. For example the following
+crash:
+
+  User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
+  segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
+  code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
+  code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c
+
+Shows that we returned to userspace without restoring the user r13
+value, due to executing the partially patched STF exit code.
+
+Fix it by doing the patching under stop machine. The CPUs that aren't
+doing the patching will be spinning in the core of the stop machine
+logic. That is currently sufficient for our purposes, because none of
+the patching we do is to that code or anywhere in the vicinity.
+
+Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
+Cc: stable@vger.kernel.org # v4.17+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/lib/feature-fixups.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -14,6 +14,7 @@
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/sched/mm.h>
++#include <linux/stop_machine.h>
+ #include <asm/cputable.h>
+ #include <asm/code-patching.h>
+ #include <asm/page.h>
+@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(e
+                                                          : "unknown");
+ }
++static int __do_stf_barrier_fixups(void *data)
++{
++      enum stf_barrier_type *types = data;
++
++      do_stf_entry_barrier_fixups(*types);
++      do_stf_exit_barrier_fixups(*types);
++
++      return 0;
++}
+ void do_stf_barrier_fixups(enum stf_barrier_type types)
+ {
+-      do_stf_entry_barrier_fixups(types);
+-      do_stf_exit_barrier_fixups(types);
++      /*
++       * The call to the fallback entry flush, and the fallback/sync-ori exit
++       * flush can not be safely patched in/out while other CPUs are executing
++       * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
++       * spin in the stop machine core with interrupts hard disabled.
++       */
++      stop_machine(__do_stf_barrier_fixups, &types, NULL);
+ }
+ void do_uaccess_flush_fixups(enum l1d_flush_type types)
index f005fa54b85a442fa750d665212d8339cc0e93f7..d3142cd49ecc66876913a01adbee375a2019d02f 100644 (file)
@@ -245,3 +245,29 @@ i40e-fix-the-restart-auto-negotiation-after-fec-modi.patch
 i40e-fix-phy-type-identifiers-for-2.5g-and-5g-adapte.patch
 i40e-remove-lldp-frame-filters.patch
 mptcp-fix-splat-when-closing-unaccepted-socket.patch
+arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
+arc-mm-pae-use-40-bit-physical-page-mask.patch
+arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
+sh-remove-unused-variable.patch
+powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
+powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
+hfsplus-prevent-corruption-in-shrinking-truncate.patch
+squashfs-fix-divide-error-in-calculate_skip.patch
+userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
+kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
+mm-hugetlb-fix-f_seal_future_write.patch
+mm-hugetlb-fix-cow-where-page-writtable-in-child.patch
+blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
+x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
+arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
+arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
+btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
+btrfs-zoned-fix-silent-data-loss-after-failure-splitting-ordered-extent.patch
+btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
+btrfs-initialize-return-variable-in-cleanup_free_space_cache_v1.patch
+btrfs-zoned-sanity-check-zone-type.patch
+drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
+drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
+drm-i915-avoid-div-by-zero-on-gen2.patch
+drm-i915-dp-use-slow-and-wide-link-training-for-everything.patch
+kvm-exit-halt-polling-on-need_resched-as-well.patch
diff --git a/queue-5.12/sh-remove-unused-variable.patch b/queue-5.12/sh-remove-unused-variable.patch
new file mode 100644 (file)
index 0000000..12b0fd8
--- /dev/null
@@ -0,0 +1,38 @@
+From 0d3ae948741ac6d80e39ab27b45297367ee477de Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 14 Apr 2021 10:05:17 -0700
+Subject: sh: Remove unused variable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 0d3ae948741ac6d80e39ab27b45297367ee477de upstream.
+
+Removes this annoying warning:
+
+arch/sh/kernel/traps.c: In function ‘nmi_trap_handler’:
+arch/sh/kernel/traps.c:183:15: warning: unused variable ‘cpu’ [-Wunused-variable]
+  183 |  unsigned int cpu = smp_processor_id();
+
+Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210414170517.1205430-1-eric.dumazet@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sh/kernel/traps.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/sh/kernel/traps.c
++++ b/arch/sh/kernel/traps.c
+@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(
+ BUILD_TRAP_HANDLER(nmi)
+ {
+-      unsigned int cpu = smp_processor_id();
+       TRAP_HANDLER_DECL;
+       arch_ftrace_nmi_enter();
diff --git a/queue-5.12/squashfs-fix-divide-error-in-calculate_skip.patch b/queue-5.12/squashfs-fix-divide-error-in-calculate_skip.patch
new file mode 100644 (file)
index 0000000..1894fad
--- /dev/null
@@ -0,0 +1,53 @@
+From d6e621de1fceb3b098ebf435ef7ea91ec4838a1a Mon Sep 17 00:00:00 2001
+From: Phillip Lougher <phillip@squashfs.org.uk>
+Date: Fri, 14 May 2021 17:27:16 -0700
+Subject: squashfs: fix divide error in calculate_skip()
+
+From: Phillip Lougher <phillip@squashfs.org.uk>
+
+commit d6e621de1fceb3b098ebf435ef7ea91ec4838a1a upstream.
+
+Sysbot has reported a "divide error" which has been identified as being
+caused by a corrupted file_size value within the file inode.  This value
+has been corrupted to a much larger value than expected.
+
+Calculate_skip() is passed i_size_read(inode) >> msblk->block_log.  Due to
+the file_size value corruption this overflows the int argument/variable in
+that function, leading to the divide error.
+
+This patch changes the function to use u64.  This will accommodate any
+unexpectedly large values due to corruption.
+
+The value returned from calculate_skip() is clamped to be never more than
+SQUASHFS_CACHED_BLKS - 1, or 7.  So file_size corruption does not lead to
+an unexpectedly large return result here.
+
+Link: https://lkml.kernel.org/r/20210507152618.9447-1-phillip@squashfs.org.uk
+Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
+Reported-by: <syzbot+e8f781243ce16ac2f962@syzkaller.appspotmail.com>
+Reported-by: <syzbot+7b98870d4fec9447b951@syzkaller.appspotmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/squashfs/file.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/squashfs/file.c
++++ b/fs/squashfs/file.c
+@@ -211,11 +211,11 @@ failure:
+  * If the skip factor is limited in this way then the file will use multiple
+  * slots.
+  */
+-static inline int calculate_skip(int blocks)
++static inline int calculate_skip(u64 blocks)
+ {
+-      int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
++      u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+                * SQUASHFS_META_INDEXES);
+-      return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
++      return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ }
diff --git a/queue-5.12/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch b/queue-5.12/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
new file mode 100644 (file)
index 0000000..4450372
--- /dev/null
@@ -0,0 +1,64 @@
+From 7ed9d238c7dbb1fdb63ad96a6184985151b0171c Mon Sep 17 00:00:00 2001
+From: Axel Rasmussen <axelrasmussen@google.com>
+Date: Fri, 14 May 2021 17:27:19 -0700
+Subject: userfaultfd: release page in error path to avoid BUG_ON
+
+From: Axel Rasmussen <axelrasmussen@google.com>
+
+commit 7ed9d238c7dbb1fdb63ad96a6184985151b0171c upstream.
+
+Consider the following sequence of events:
+
+1. Userspace issues a UFFD ioctl, which ends up calling into
+   shmem_mfill_atomic_pte(). We successfully account the blocks, we
+   shmem_alloc_page(), but then the copy_from_user() fails. We return
+   -ENOENT. We don't release the page we allocated.
+2. Our caller detects this error code, tries the copy_from_user() after
+   dropping the mmap_lock, and retries, calling back into
+   shmem_mfill_atomic_pte().
+3. Meanwhile, let's say another process filled up the tmpfs being used.
+4. So shmem_mfill_atomic_pte() fails to account blocks this time, and
+   immediately returns - without releasing the page.
+
+This triggers a BUG_ON in our caller, which asserts that the page
+should always be consumed, unless -ENOENT is returned.
+
+To fix this, detect if we have such a "dangling" page when accounting
+fails, and if so, release it before returning.
+
+Link: https://lkml.kernel.org/r/20210428230858.348400-1-axelrasmussen@google.com
+Fixes: cb658a453b93 ("userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY")
+Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2375,8 +2375,18 @@ static int shmem_mfill_atomic_pte(struct
+       pgoff_t offset, max_off;
+       ret = -ENOMEM;
+-      if (!shmem_inode_acct_block(inode, 1))
++      if (!shmem_inode_acct_block(inode, 1)) {
++              /*
++               * We may have got a page, returned -ENOENT triggering a retry,
++               * and now we find ourselves with -ENOMEM. Release the page, to
++               * avoid a BUG_ON in our caller.
++               */
++              if (unlikely(*pagep)) {
++                      put_page(*pagep);
++                      *pagep = NULL;
++              }
+               goto out;
++      }
+       if (!*pagep) {
+               page = shmem_alloc_page(gfp, info, pgoff);
diff --git a/queue-5.12/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch b/queue-5.12/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
new file mode 100644 (file)
index 0000000..1caa4b0
--- /dev/null
@@ -0,0 +1,106 @@
+From 3743d55b289c203d8f77b7cd47c24926b9d186ae Mon Sep 17 00:00:00 2001
+From: Huang Rui <ray.huang@amd.com>
+Date: Sun, 25 Apr 2021 15:34:51 +0800
+Subject: x86, sched: Fix the AMD CPPC maximum performance value on certain AMD Ryzen generations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Huang Rui <ray.huang@amd.com>
+
+commit 3743d55b289c203d8f77b7cd47c24926b9d186ae upstream.
+
+Some AMD Ryzen generations has different calculation method on maximum
+performance. 255 is not for all ASICs, some specific generations should use 166
+as the maximum performance. Otherwise, it will report incorrect frequency value
+like below:
+
+  ~ → lscpu | grep MHz
+  CPU MHz:                         3400.000
+  CPU max MHz:                     7228.3198
+  CPU min MHz:                     2200.0000
+
+[ mingo: Tidied up whitespace use. ]
+[ Alexander Monakov <amonakov@ispras.ru>: fix 225 -> 255 typo. ]
+
+Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems")
+Fixes: 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies")
+Reported-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
+Fixed-by: Alexander Monakov <amonakov@ispras.ru>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210425073451.2557394-1-ray.huang@amd.com
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211791
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/processor.h |    2 ++
+ arch/x86/kernel/cpu/amd.c        |   16 ++++++++++++++++
+ arch/x86/kernel/smpboot.c        |    2 +-
+ drivers/cpufreq/acpi-cpufreq.c   |    6 +++++-
+ 4 files changed, 24 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -804,8 +804,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_s
+ #ifdef CONFIG_CPU_SUP_AMD
+ extern u32 amd_get_nodes_per_socket(void);
++extern u32 amd_get_highest_perf(void);
+ #else
+ static inline u32 amd_get_nodes_per_socket(void)      { return 0; }
++static inline u32 amd_get_highest_perf(void)          { return 0; }
+ #endif
+ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1170,3 +1170,19 @@ void set_dr_addr_mask(unsigned long mask
+               break;
+       }
+ }
++
++u32 amd_get_highest_perf(void)
++{
++      struct cpuinfo_x86 *c = &boot_cpu_data;
++
++      if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
++                             (c->x86_model >= 0x70 && c->x86_model < 0x80)))
++              return 166;
++
++      if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
++                             (c->x86_model >= 0x40 && c->x86_model < 0x70)))
++              return 166;
++
++      return 255;
++}
++EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -2046,7 +2046,7 @@ static bool amd_set_max_freq_ratio(void)
+               return false;
+       }
+-      highest_perf = perf_caps.highest_perf;
++      highest_perf = amd_get_highest_perf();
+       nominal_perf = perf_caps.nominal_perf;
+       if (!highest_perf || !nominal_perf) {
+--- a/drivers/cpufreq/acpi-cpufreq.c
++++ b/drivers/cpufreq/acpi-cpufreq.c
+@@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned
+               return 0;
+       }
+-      highest_perf = perf_caps.highest_perf;
++      if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++              highest_perf = amd_get_highest_perf();
++      else
++              highest_perf = perf_caps.highest_perf;
++
+       nominal_perf = perf_caps.nominal_perf;
+       if (!highest_perf || !nominal_perf) {