From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 16 May 2021 09:32:50 +0000 (+0200)
Subject: 5.11-stable patches
X-Git-Tag: v5.4.120~78
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=523e03212e8698e42db15fc5c5f6ca71edba935b;p=thirdparty%2Fkernel%2Fstable-queue.git

5.11-stable patches

added patches:
	arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
	arc-mm-pae-use-40-bit-physical-page-mask.patch
	arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
	arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
	arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
	blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
	btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
	btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
	drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
	drm-i915-avoid-div-by-zero-on-gen2.patch
	drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
	hfsplus-prevent-corruption-in-shrinking-truncate.patch
	kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
	kvm-exit-halt-polling-on-need_resched-as-well.patch
	mm-hugetlb-fix-f_seal_future_write.patch
	powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
	powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
	sh-remove-unused-variable.patch
	squashfs-fix-divide-error-in-calculate_skip.patch
	userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
	x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
---

diff --git a/queue-5.11/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch b/queue-5.11/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
new file mode 100644
index 00000000000..1790674df64
--- /dev/null
+++ b/queue-5.11/arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
@@ -0,0 +1,51 @@
+From 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc Mon Sep 17 00:00:00 2001
+From: Vineet Gupta <vgupta@synopsys.com>
+Date: Fri, 23 Apr 2021 12:16:25 -0700
+Subject: ARC: entry: fix off-by-one error in syscall number validation
+
+From: Vineet Gupta <vgupta@synopsys.com>
+
+commit 3433adc8bd09fc9f29b8baddf33b4ecd1ecd2cdc upstream.
+
+We have NR_syscall syscalls from [0 .. NR_syscall-1].
+However the check for invalid syscall number is "> NR_syscall" as
+opposed to >=. This off-by-one error erronesously allows "NR_syscall"
+to be treated as valid syscall causeing out-of-bounds access into
+syscall-call table ensuing a crash (holes within syscall table have a
+invalid-entry handler but this is beyond the array implementing the
+table).
+
+This problem showed up on v5.6 kernel when testing glibc 2.33 (v5.10
+kernel capable, includng faccessat2 syscall 439). The v5.6 kernel has
+NR_syscalls=439 (0 to 438). Due to the bug, 439 passed by glibc was
+not handled as -ENOSYS but processed leading to a crash.
+
+Link: https://github.com/foss-for-synopsys-dwc-arc-processors/linux/issues/48
+Reported-by: Shahab Vahedi <shahab@synopsys.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/kernel/entry.S |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arc/kernel/entry.S
++++ b/arch/arc/kernel/entry.S
+@@ -177,7 +177,7 @@ tracesys:
+ 
+ 	; Do the Sys Call as we normally would.
+ 	; Validate the Sys Call number
+-	cmp     r8,  NR_syscalls
++	cmp     r8,  NR_syscalls - 1
+ 	mov.hi  r0, -ENOSYS
+ 	bhi     tracesys_exit
+ 
+@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
+ 	;============ Normal syscall case
+ 
+ 	; syscall num shd not exceed the total system calls avail
+-	cmp     r8,  NR_syscalls
++	cmp     r8,  NR_syscalls - 1
+ 	mov.hi  r0, -ENOSYS
+ 	bhi     .Lret_from_system_call
+ 
diff --git a/queue-5.11/arc-mm-pae-use-40-bit-physical-page-mask.patch b/queue-5.11/arc-mm-pae-use-40-bit-physical-page-mask.patch
new file mode 100644
index 00000000000..17e5bfb356b
--- /dev/null
+++ b/queue-5.11/arc-mm-pae-use-40-bit-physical-page-mask.patch
@@ -0,0 +1,133 @@
+From c5f756d8c6265ebb1736a7787231f010a3b782e5 Mon Sep 17 00:00:00 2001
+From: Vladimir Isaev <isaev@synopsys.com>
+Date: Tue, 27 Apr 2021 15:12:37 +0300
+Subject: ARC: mm: PAE: use 40-bit physical page mask
+
+From: Vladimir Isaev <isaev@synopsys.com>
+
+commit c5f756d8c6265ebb1736a7787231f010a3b782e5 upstream.
+
+32-bit PAGE_MASK can not be used as a mask for physical addresses
+when PAE is enabled. PAGE_MASK_PHYS must be used for physical
+addresses instead of PAGE_MASK.
+
+Without this, init gets SIGSEGV if pte_modify was called:
+
+| potentially unexpected fatal signal 11.
+| Path: /bin/busybox
+| CPU: 0 PID: 1 Comm: init Not tainted 5.12.0-rc5-00003-g1e43c377a79f-dirty
+| Insn could not be fetched
+|     @No matching VMA found
+|  ECR: 0x00040000 EFA: 0x00000000 ERET: 0x00000000
+| STAT: 0x80080082 [IE U     ]   BTA: 0x00000000
+|  SP: 0x5f9ffe44  FP: 0x00000000 BLK: 0xaf3d4
+| LPS: 0x000d093e LPE: 0x000d0950 LPC: 0x00000000
+| r00: 0x00000002 r01: 0x5f9fff14 r02: 0x5f9fff20
+| ...
+| Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
+
+Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Cc: Vineet Gupta <vgupta@synopsys.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/include/asm/page.h      |   12 ++++++++++++
+ arch/arc/include/asm/pgtable.h   |   12 +++---------
+ arch/arc/include/uapi/asm/page.h |    1 -
+ arch/arc/mm/ioremap.c            |    5 +++--
+ arch/arc/mm/tlb.c                |    2 +-
+ 5 files changed, 19 insertions(+), 13 deletions(-)
+
+--- a/arch/arc/include/asm/page.h
++++ b/arch/arc/include/asm/page.h
+@@ -7,6 +7,18 @@
+ 
+ #include <uapi/asm/page.h>
+ 
++#ifdef CONFIG_ARC_HAS_PAE40
++
++#define MAX_POSSIBLE_PHYSMEM_BITS	40
++#define PAGE_MASK_PHYS			(0xff00000000ull | PAGE_MASK)
++
++#else /* CONFIG_ARC_HAS_PAE40 */
++
++#define MAX_POSSIBLE_PHYSMEM_BITS	32
++#define PAGE_MASK_PHYS			PAGE_MASK
++
++#endif /* CONFIG_ARC_HAS_PAE40 */
++
+ #ifndef __ASSEMBLY__
+ 
+ #define clear_page(paddr)		memset((paddr), 0, PAGE_SIZE)
+--- a/arch/arc/include/asm/pgtable.h
++++ b/arch/arc/include/asm/pgtable.h
+@@ -107,8 +107,8 @@
+ #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
+ 
+ /* Set of bits not changed in pte_modify */
+-#define _PAGE_CHG_MASK	(PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
+-
++#define _PAGE_CHG_MASK	(PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
++							   _PAGE_SPECIAL)
+ /* More Abbrevaited helpers */
+ #define PAGE_U_NONE     __pgprot(___DEF)
+ #define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
+@@ -132,13 +132,7 @@
+ #define PTE_BITS_IN_PD0		(_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
+ #define PTE_BITS_RWX		(_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
+ 
+-#ifdef CONFIG_ARC_HAS_PAE40
+-#define PTE_BITS_NON_RWX_IN_PD1	(0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
+-#define MAX_POSSIBLE_PHYSMEM_BITS 40
+-#else
+-#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK | _PAGE_CACHEABLE)
+-#define MAX_POSSIBLE_PHYSMEM_BITS 32
+-#endif
++#define PTE_BITS_NON_RWX_IN_PD1	(PAGE_MASK_PHYS | _PAGE_CACHEABLE)
+ 
+ /**************************************************************************
+  * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
+--- a/arch/arc/include/uapi/asm/page.h
++++ b/arch/arc/include/uapi/asm/page.h
+@@ -33,5 +33,4 @@
+ 
+ #define PAGE_MASK	(~(PAGE_SIZE-1))
+ 
+-
+ #endif /* _UAPI__ASM_ARC_PAGE_H */
+--- a/arch/arc/mm/ioremap.c
++++ b/arch/arc/mm/ioremap.c
+@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
+ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
+ 			   unsigned long flags)
+ {
++	unsigned int off;
+ 	unsigned long vaddr;
+ 	struct vm_struct *area;
+-	phys_addr_t off, end;
++	phys_addr_t end;
+ 	pgprot_t prot = __pgprot(flags);
+ 
+ 	/* Don't allow wraparound, zero size */
+@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t p
+ 
+ 	/* Mappings have to be page-aligned */
+ 	off = paddr & ~PAGE_MASK;
+-	paddr &= PAGE_MASK;
++	paddr &= PAGE_MASK_PHYS;
+ 	size = PAGE_ALIGN(end + 1) - paddr;
+ 
+ 	/*
+--- a/arch/arc/mm/tlb.c
++++ b/arch/arc/mm/tlb.c
+@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_str
+ 		      pte_t *ptep)
+ {
+ 	unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
+-	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
++	phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
+ 	struct page *page = pfn_to_page(pte_pfn(*ptep));
+ 
+ 	create_tlb(vma, vaddr, ptep);
diff --git a/queue-5.11/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch b/queue-5.11/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
new file mode 100644
index 00000000000..062c31d993e
--- /dev/null
+++ b/queue-5.11/arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
@@ -0,0 +1,56 @@
+From 1d5e4640e5df15252398c1b621f6bd432f2d7f17 Mon Sep 17 00:00:00 2001
+From: Vladimir Isaev <isaev@synopsys.com>
+Date: Tue, 27 Apr 2021 15:13:54 +0300
+Subject: ARC: mm: Use max_high_pfn as a HIGHMEM zone border
+
+From: Vladimir Isaev <isaev@synopsys.com>
+
+commit 1d5e4640e5df15252398c1b621f6bd432f2d7f17 upstream.
+
+Commit 4af22ded0ecf ("arc: fix memory initialization for systems
+with two memory banks") fixed highmem, but for the PAE case it causes
+bug messages:
+
+| BUG: Bad page state in process swapper  pfn:80000
+| page:(ptrval) refcount:0 mapcount:1 mapping:00000000 index:0x0 pfn:0x80000 flags: 0x0()
+| raw: 00000000 00000100 00000122 00000000 00000000 00000000 00000000 00000000
+| raw: 00000000
+| page dumped because: nonzero mapcount
+| Modules linked in:
+| CPU: 0 PID: 0 Comm: swapper Not tainted 5.12.0-rc5-00003-g1e43c377a79f #1
+
+This is because the fix expects highmem to be always less than
+lowmem and uses min_low_pfn as an upper zone border for highmem.
+
+max_high_pfn should be ok for both highmem and highmem+PAE cases.
+
+Fixes: 4af22ded0ecf ("arc: fix memory initialization for systems with two memory banks")
+Signed-off-by: Vladimir Isaev <isaev@synopsys.com>
+Cc: Mike Rapoport <rppt@linux.ibm.com>
+Cc: stable@vger.kernel.org  #5.8 onwards
+Signed-off-by: Vineet Gupta <vgupta@synopsys.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arc/mm/init.c |   11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+--- a/arch/arc/mm/init.c
++++ b/arch/arc/mm/init.c
+@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
+ 	min_high_pfn = PFN_DOWN(high_mem_start);
+ 	max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
+ 
+-	max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
++	/*
++	 * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
++	 * For HIGHMEM without PAE max_high_pfn should be less than
++	 * min_low_pfn to guarantee that these two regions don't overlap.
++	 * For PAE case highmem is greater than lowmem, so it is natural
++	 * to use max_high_pfn.
++	 *
++	 * In both cases, holes should be handled by pfn_valid().
++	 */
++	max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
+ 
+ 	high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
+ 
diff --git a/queue-5.11/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch b/queue-5.11/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
new file mode 100644
index 00000000000..5dbb8c685d8
--- /dev/null
+++ b/queue-5.11/arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
@@ -0,0 +1,53 @@
+From 588a513d34257fdde95a9f0df0202e31998e85c6 Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Fri, 14 May 2021 10:50:01 +0100
+Subject: arm64: Fix race condition on PG_dcache_clean in __sync_icache_dcache()
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit 588a513d34257fdde95a9f0df0202e31998e85c6 upstream.
+
+To ensure that instructions are observable in a new mapping, the arm64
+set_pte_at() implementation cleans the D-cache and invalidates the
+I-cache to the PoU. As an optimisation, this is only done on executable
+mappings and the PG_dcache_clean page flag is set to avoid future cache
+maintenance on the same page.
+
+When two different processes map the same page (e.g. private executable
+file or shared mapping) there's a potential race on checking and setting
+PG_dcache_clean via set_pte_at() -> __sync_icache_dcache(). While on the
+fault paths the page is locked (PG_locked), mprotect() does not take the
+page lock. The result is that one process may see the PG_dcache_clean
+flag set but the I/D cache maintenance not yet performed.
+
+Avoid test_and_set_bit(PG_dcache_clean) in favour of separate test_bit()
+and set_bit(). In the rare event of a race, the cache maintenance is
+done twice.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: <stable@vger.kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Steven Price <steven.price@arm.com>
+Reviewed-by: Steven Price <steven.price@arm.com>
+Acked-by: Will Deacon <will@kernel.org>
+Link: https://lore.kernel.org/r/20210514095001.13236-1-catalin.marinas@arm.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/flush.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/arm64/mm/flush.c
++++ b/arch/arm64/mm/flush.c
+@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
+ {
+ 	struct page *page = pte_page(pte);
+ 
+-	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
++	if (!test_bit(PG_dcache_clean, &page->flags)) {
+ 		sync_icache_aliases(page_address(page), page_size(page));
++		set_bit(PG_dcache_clean, &page->flags);
++	}
+ }
+ EXPORT_SYMBOL_GPL(__sync_icache_dcache);
+ 
diff --git a/queue-5.11/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch b/queue-5.11/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
new file mode 100644
index 00000000000..03a9017588b
--- /dev/null
+++ b/queue-5.11/arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
@@ -0,0 +1,51 @@
+From 37a8024d265564eba680575df6421f19db21dfce Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 7 May 2021 11:59:05 -0700
+Subject: arm64: mte: initialize RGSR_EL1.SEED in __cpu_setup
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit 37a8024d265564eba680575df6421f19db21dfce upstream.
+
+A valid implementation choice for the ChooseRandomNonExcludedTag()
+pseudocode function used by IRG is to behave in the same way as with
+GCR_EL1.RRND=0. This would mean that RGSR_EL1.SEED is used as an LFSR
+which must have a non-zero value in order for IRG to properly produce
+pseudorandom numbers. However, RGSR_EL1 is reset to an UNKNOWN value
+on soft reset and thus may reset to 0. Therefore we must initialize
+RGSR_EL1.SEED to a non-zero value in order to ensure that IRG behaves
+as expected.
+
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Fixes: 3b714d24ef17 ("arm64: mte: CPU feature detection and initial sysreg configuration")
+Cc: <stable@vger.kernel.org> # 5.10
+Link: https://linux-review.googlesource.com/id/I2b089b6c7d6f17ee37e2f0db7df5ad5bcc04526c
+Acked-by: Mark Rutland <mark.rutland@arm.com>
+Link: https://lore.kernel.org/r/20210507185905.1745402-1-pcc@google.com
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/mm/proc.S |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/arch/arm64/mm/proc.S
++++ b/arch/arm64/mm/proc.S
+@@ -454,6 +454,18 @@ SYM_FUNC_START(__cpu_setup)
+ 	mov	x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
+ 	msr_s	SYS_GCR_EL1, x10
+ 
++	/*
++	 * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
++	 * RGSR_EL1.SEED must be non-zero for IRG to produce
++	 * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
++	 * must initialize it.
++	 */
++	mrs	x10, CNTVCT_EL0
++	ands	x10, x10, #SYS_RGSR_EL1_SEED_MASK
++	csinc	x10, x10, xzr, ne
++	lsl	x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
++	msr_s	SYS_RGSR_EL1, x10
++
+ 	/* clear any pending tag check faults in TFSR*_EL1 */
+ 	msr_s	SYS_TFSR_EL1, xzr
+ 	msr_s	SYS_TFSRE0_EL1, xzr
diff --git a/queue-5.11/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch b/queue-5.11/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
new file mode 100644
index 00000000000..7aeee23acdc
--- /dev/null
+++ b/queue-5.11/blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
@@ -0,0 +1,90 @@
+From e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 Mon Sep 17 00:00:00 2001
+From: Tejun Heo <tj@kernel.org>
+Date: Tue, 11 May 2021 21:38:36 -0400
+Subject: blk-iocost: fix weight updates of inner active iocgs
+
+From: Tejun Heo <tj@kernel.org>
+
+commit e9f4eee9a0023ba22db9560d4cc6ee63f933dae8 upstream.
+
+When the weight of an active iocg is updated, weight_updated() is called
+which in turn calls __propagate_weights() to update the active and inuse
+weights so that the effective hierarchical weights are update accordingly.
+
+The current implementation is incorrect for inner active nodes. For an
+active leaf iocg, inuse can be any value between 1 and active and the
+difference represents how much the iocg is donating. When weight is updated,
+as long as inuse is clamped between 1 and the new weight, we're alright and
+this is what __propagate_weights() currently implements.
+
+However, that's not how an active inner node's inuse is set. An inner node's
+inuse is solely determined by the ratio between the sums of inuse's and
+active's of its children - ie. they're results of propagating the leaves'
+active and inuse weights upwards. __propagate_weights() incorrectly applies
+the same clamping as for a leaf when an active inner node's weight is
+updated. Consider a hierarchy which looks like the following with saturating
+workloads in AA and BB.
+
+     R
+   /   \
+  A     B
+  |     |
+ AA     BB
+
+1. For both A and B, active=100, inuse=100, hwa=0.5, hwi=0.5.
+
+2. echo 200 > A/io.weight
+
+3. __propagate_weights() update A's active to 200 and leave inuse at 100 as
+   it's already between 1 and the new active, making A:active=200,
+   A:inuse=100. As R's active_sum is updated along with A's active,
+   A:hwa=2/3, B:hwa=1/3. However, because the inuses didn't change, the
+   hwi's remain unchanged at 0.5.
+
+4. The weight of A is now twice that of B but AA and BB still have the same
+   hwi of 0.5 and thus are doing the same amount of IOs.
+
+Fix it by making __propgate_weights() always calculate the inuse of an
+active inner iocg based on the ratio of child_inuse_sum to child_active_sum.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Reported-by: Dan Schatzberg <dschatzberg@fb.com>
+Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
+Cc: stable@vger.kernel.org # v5.4+
+Link: https://lore.kernel.org/r/YJsxnLZV1MnBcqjj@slm.duckdns.org
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ block/blk-iocost.c |   14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -1073,7 +1073,17 @@ static void __propagate_weights(struct i
+ 
+ 	lockdep_assert_held(&ioc->lock);
+ 
+-	inuse = clamp_t(u32, inuse, 1, active);
++	/*
++	 * For an active leaf node, its inuse shouldn't be zero or exceed
++	 * @active. An active internal node's inuse is solely determined by the
++	 * inuse to active ratio of its children regardless of @inuse.
++	 */
++	if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
++		inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
++					   iocg->child_active_sum);
++	} else {
++		inuse = clamp_t(u32, inuse, 1, active);
++	}
+ 
+ 	iocg->last_inuse = iocg->inuse;
+ 	if (save)
+@@ -1090,7 +1100,7 @@ static void __propagate_weights(struct i
+ 		/* update the level sums */
+ 		parent->child_active_sum += (s32)(active - child->active);
+ 		parent->child_inuse_sum += (s32)(inuse - child->inuse);
+-		/* apply the udpates */
++		/* apply the updates */
+ 		child->active = active;
+ 		child->inuse = inuse;
+ 
diff --git a/queue-5.11/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch b/queue-5.11/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
new file mode 100644
index 00000000000..0f2caa60b3f
--- /dev/null
+++ b/queue-5.11/btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
@@ -0,0 +1,229 @@
+From f9baa501b4fd6962257853d46ddffbc21f27e344 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Thu, 22 Apr 2021 12:08:05 +0100
+Subject: btrfs: fix deadlock when cloning inline extents and using qgroups
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit f9baa501b4fd6962257853d46ddffbc21f27e344 upstream.
+
+There are a few exceptional cases where cloning an inline extent needs to
+copy the inline extent data into a page of the destination inode.
+
+When this happens, we end up starting a transaction while having a dirty
+page for the destination inode and while having the range locked in the
+destination's inode iotree too. Because when reserving metadata space
+for a transaction we may need to flush existing delalloc in case there is
+not enough free space, we have a mechanism in place to prevent a deadlock,
+which was introduced in commit 3d45f221ce627d ("btrfs: fix deadlock when
+cloning inline extent and low on free metadata space").
+
+However when using qgroups, a transaction also reserves metadata qgroup
+space, which can also result in flushing delalloc in case there is not
+enough available space at the moment. When this happens we deadlock, since
+flushing delalloc requires locking the file range in the inode's iotree
+and the range was already locked at the very beginning of the clone
+operation, before attempting to start the transaction.
+
+When this issue happens, stack traces like the following are reported:
+
+  [72747.556262] task:kworker/u81:9   state:D stack:    0 pid:  225 ppid:     2 flags:0x00004000
+  [72747.556268] Workqueue: writeback wb_workfn (flush-btrfs-1142)
+  [72747.556271] Call Trace:
+  [72747.556273]  __schedule+0x296/0x760
+  [72747.556277]  schedule+0x3c/0xa0
+  [72747.556279]  io_schedule+0x12/0x40
+  [72747.556284]  __lock_page+0x13c/0x280
+  [72747.556287]  ? generic_file_readonly_mmap+0x70/0x70
+  [72747.556325]  extent_write_cache_pages+0x22a/0x440 [btrfs]
+  [72747.556331]  ? __set_page_dirty_nobuffers+0xe7/0x160
+  [72747.556358]  ? set_extent_buffer_dirty+0x5e/0x80 [btrfs]
+  [72747.556362]  ? update_group_capacity+0x25/0x210
+  [72747.556366]  ? cpumask_next_and+0x1a/0x20
+  [72747.556391]  extent_writepages+0x44/0xa0 [btrfs]
+  [72747.556394]  do_writepages+0x41/0xd0
+  [72747.556398]  __writeback_single_inode+0x39/0x2a0
+  [72747.556403]  writeback_sb_inodes+0x1ea/0x440
+  [72747.556407]  __writeback_inodes_wb+0x5f/0xc0
+  [72747.556410]  wb_writeback+0x235/0x2b0
+  [72747.556414]  ? get_nr_inodes+0x35/0x50
+  [72747.556417]  wb_workfn+0x354/0x490
+  [72747.556420]  ? newidle_balance+0x2c5/0x3e0
+  [72747.556424]  process_one_work+0x1aa/0x340
+  [72747.556426]  worker_thread+0x30/0x390
+  [72747.556429]  ? create_worker+0x1a0/0x1a0
+  [72747.556432]  kthread+0x116/0x130
+  [72747.556435]  ? kthread_park+0x80/0x80
+  [72747.556438]  ret_from_fork+0x1f/0x30
+
+  [72747.566958] Workqueue: btrfs-flush_delalloc btrfs_work_helper [btrfs]
+  [72747.566961] Call Trace:
+  [72747.566964]  __schedule+0x296/0x760
+  [72747.566968]  ? finish_wait+0x80/0x80
+  [72747.566970]  schedule+0x3c/0xa0
+  [72747.566995]  wait_extent_bit.constprop.68+0x13b/0x1c0 [btrfs]
+  [72747.566999]  ? finish_wait+0x80/0x80
+  [72747.567024]  lock_extent_bits+0x37/0x90 [btrfs]
+  [72747.567047]  btrfs_invalidatepage+0x299/0x2c0 [btrfs]
+  [72747.567051]  ? find_get_pages_range_tag+0x2cd/0x380
+  [72747.567076]  __extent_writepage+0x203/0x320 [btrfs]
+  [72747.567102]  extent_write_cache_pages+0x2bb/0x440 [btrfs]
+  [72747.567106]  ? update_load_avg+0x7e/0x5f0
+  [72747.567109]  ? enqueue_entity+0xf4/0x6f0
+  [72747.567134]  extent_writepages+0x44/0xa0 [btrfs]
+  [72747.567137]  ? enqueue_task_fair+0x93/0x6f0
+  [72747.567140]  do_writepages+0x41/0xd0
+  [72747.567144]  __filemap_fdatawrite_range+0xc7/0x100
+  [72747.567167]  btrfs_run_delalloc_work+0x17/0x40 [btrfs]
+  [72747.567195]  btrfs_work_helper+0xc2/0x300 [btrfs]
+  [72747.567200]  process_one_work+0x1aa/0x340
+  [72747.567202]  worker_thread+0x30/0x390
+  [72747.567205]  ? create_worker+0x1a0/0x1a0
+  [72747.567208]  kthread+0x116/0x130
+  [72747.567211]  ? kthread_park+0x80/0x80
+  [72747.567214]  ret_from_fork+0x1f/0x30
+
+  [72747.569686] task:fsstress        state:D stack:    0 pid:841421 ppid:841417 flags:0x00000000
+  [72747.569689] Call Trace:
+  [72747.569691]  __schedule+0x296/0x760
+  [72747.569694]  schedule+0x3c/0xa0
+  [72747.569721]  try_flush_qgroup+0x95/0x140 [btrfs]
+  [72747.569725]  ? finish_wait+0x80/0x80
+  [72747.569753]  btrfs_qgroup_reserve_data+0x34/0x50 [btrfs]
+  [72747.569781]  btrfs_check_data_free_space+0x5f/0xa0 [btrfs]
+  [72747.569804]  btrfs_buffered_write+0x1f7/0x7f0 [btrfs]
+  [72747.569810]  ? path_lookupat.isra.48+0x97/0x140
+  [72747.569833]  btrfs_file_write_iter+0x81/0x410 [btrfs]
+  [72747.569836]  ? __kmalloc+0x16a/0x2c0
+  [72747.569839]  do_iter_readv_writev+0x160/0x1c0
+  [72747.569843]  do_iter_write+0x80/0x1b0
+  [72747.569847]  vfs_writev+0x84/0x140
+  [72747.569869]  ? btrfs_file_llseek+0x38/0x270 [btrfs]
+  [72747.569873]  do_writev+0x65/0x100
+  [72747.569876]  do_syscall_64+0x33/0x40
+  [72747.569879]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+  [72747.569899] task:fsstress        state:D stack:    0 pid:841424 ppid:841417 flags:0x00004000
+  [72747.569903] Call Trace:
+  [72747.569906]  __schedule+0x296/0x760
+  [72747.569909]  schedule+0x3c/0xa0
+  [72747.569936]  try_flush_qgroup+0x95/0x140 [btrfs]
+  [72747.569940]  ? finish_wait+0x80/0x80
+  [72747.569967]  __btrfs_qgroup_reserve_meta+0x36/0x50 [btrfs]
+  [72747.569989]  start_transaction+0x279/0x580 [btrfs]
+  [72747.570014]  clone_copy_inline_extent+0x332/0x490 [btrfs]
+  [72747.570041]  btrfs_clone+0x5b7/0x7a0 [btrfs]
+  [72747.570068]  ? lock_extent_bits+0x64/0x90 [btrfs]
+  [72747.570095]  btrfs_clone_files+0xfc/0x150 [btrfs]
+  [72747.570122]  btrfs_remap_file_range+0x3d8/0x4a0 [btrfs]
+  [72747.570126]  do_clone_file_range+0xed/0x200
+  [72747.570131]  vfs_clone_file_range+0x37/0x110
+  [72747.570134]  ioctl_file_clone+0x7d/0xb0
+  [72747.570137]  do_vfs_ioctl+0x138/0x630
+  [72747.570140]  __x64_sys_ioctl+0x62/0xc0
+  [72747.570143]  do_syscall_64+0x33/0x40
+  [72747.570146]  entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+So fix this by skipping the flush of delalloc for an inode that is
+flagged with BTRFS_INODE_NO_DELALLOC_FLUSH, meaning it is currently under
+such a special case of cloning an inline extent, when flushing delalloc
+during qgroup metadata reservation.
+
+The special cases for cloning inline extents were added in kernel 5.7 by
+by commit 05a5a7621ce66c ("Btrfs: implement full reflink support for
+inline extents"), while having qgroup metadata space reservation flushing
+delalloc when low on space was added in kernel 5.9 by commit
+c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get
+-EDQUOT"). So use a "Fixes:" tag for the later commit to ease stable
+kernel backports.
+
+Reported-by: Wang Yugui <wangyugui@e16-tech.com>
+Link: https://lore.kernel.org/linux-btrfs/20210421083137.31E3.409509F4@e16-tech.com/
+Fixes: c53e9653605dbf ("btrfs: qgroup: try to flush qgroup space when we get -EDQUOT")
+CC: stable@vger.kernel.org # 5.9+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/ctree.h  |    2 +-
+ fs/btrfs/inode.c  |    4 ++--
+ fs/btrfs/ioctl.c  |    2 +-
+ fs/btrfs/qgroup.c |    2 +-
+ fs/btrfs/send.c   |    4 ++--
+ 5 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -3104,7 +3104,7 @@ int btrfs_truncate_inode_items(struct bt
+ 			       struct btrfs_inode *inode, u64 new_size,
+ 			       u32 min_type);
+ 
+-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
++int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
+ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+ 			       bool in_reclaim_context);
+ int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -9475,7 +9475,7 @@ out:
+ 	return ret;
+ }
+ 
+-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
++int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
+ {
+ 	struct writeback_control wbc = {
+ 		.nr_to_write = LONG_MAX,
+@@ -9488,7 +9488,7 @@ int btrfs_start_delalloc_snapshot(struct
+ 	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
+ 		return -EROFS;
+ 
+-	return start_delalloc_inodes(root, &wbc, true, false);
++	return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
+ }
+ 
+ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -1042,7 +1042,7 @@ static noinline int btrfs_mksnapshot(con
+ 	 */
+ 	btrfs_drew_read_lock(&root->snapshot_lock);
+ 
+-	ret = btrfs_start_delalloc_snapshot(root);
++	ret = btrfs_start_delalloc_snapshot(root, false);
+ 	if (ret)
+ 		goto out;
+ 
+--- a/fs/btrfs/qgroup.c
++++ b/fs/btrfs/qgroup.c
+@@ -3579,7 +3579,7 @@ static int try_flush_qgroup(struct btrfs
+ 		return 0;
+ 	}
+ 
+-	ret = btrfs_start_delalloc_snapshot(root);
++	ret = btrfs_start_delalloc_snapshot(root, true);
+ 	if (ret < 0)
+ 		goto out;
+ 	btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
+--- a/fs/btrfs/send.c
++++ b/fs/btrfs/send.c
+@@ -7159,7 +7159,7 @@ static int flush_delalloc_roots(struct s
+ 	int i;
+ 
+ 	if (root) {
+-		ret = btrfs_start_delalloc_snapshot(root);
++		ret = btrfs_start_delalloc_snapshot(root, false);
+ 		if (ret)
+ 			return ret;
+ 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
+@@ -7167,7 +7167,7 @@ static int flush_delalloc_roots(struct s
+ 
+ 	for (i = 0; i < sctx->clone_roots_cnt; i++) {
+ 		root = sctx->clone_roots[i].root;
+-		ret = btrfs_start_delalloc_snapshot(root);
++		ret = btrfs_start_delalloc_snapshot(root, false);
+ 		if (ret)
+ 			return ret;
+ 		btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/queue-5.11/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch b/queue-5.11/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
new file mode 100644
index 00000000000..f79720522d5
--- /dev/null
+++ b/queue-5.11/btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
@@ -0,0 +1,271 @@
+From 626e9f41f7c281ba3e02843702f68471706aa6d9 Mon Sep 17 00:00:00 2001
+From: Filipe Manana <fdmanana@suse.com>
+Date: Tue, 27 Apr 2021 11:27:20 +0100
+Subject: btrfs: fix race leading to unpersisted data and metadata on fsync
+
+From: Filipe Manana <fdmanana@suse.com>
+
+commit 626e9f41f7c281ba3e02843702f68471706aa6d9 upstream.
+
+When doing a fast fsync on a file, there is a race which can result in the
+fsync returning success to user space without logging the inode and without
+durably persisting new data.
+
+The following example shows one possible scenario for this:
+
+   $ mkfs.btrfs -f /dev/sdc
+   $ mount /dev/sdc /mnt
+
+   $ touch /mnt/bar
+   $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/baz
+
+   # Now we have:
+   # file bar == inode 257
+   # file baz == inode 258
+
+   $ mv /mnt/baz /mnt/foo
+
+   # Now we have:
+   # file bar == inode 257
+   # file foo == inode 258
+
+   $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo
+
+   # fsync bar before foo, it is important to trigger the race.
+   $ xfs_io -c "fsync" /mnt/bar
+   $ xfs_io -c "fsync" /mnt/foo
+
+   # After this:
+   # inode 257, file bar, is empty
+   # inode 258, file foo, has 1M filled with 0xcd
+
+   <power failure>
+
+   # Replay the log:
+   $ mount /dev/sdc /mnt
+
+   # After this point file foo should have 1M filled with 0xcd and not 0xab
+
+The following steps explain how the race happens:
+
+1) Before the first fsync of inode 258, when it has the "baz" name, its
+   ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1.
+   The inode also has the full sync flag set;
+
+2) After the first fsync, we set inode 258 ->logged_trans to 6, which is
+   the generation of the current transaction, and set ->last_log_commit
+   to 0, which is the current value of ->last_sub_trans (done at
+   btrfs_log_inode()).
+
+   The full sync flag is cleared from the inode during the fsync.
+
+   The log sub transaction that was committed had an ID of 0 and when we
+   synced the log, at btrfs_sync_log(), we incremented root->log_transid
+   from 0 to 1;
+
+3) During the rename:
+
+   We update inode 258, through btrfs_update_inode(), and that causes its
+   ->last_sub_trans to be set to 1 (the current log transaction ID), and
+   ->last_log_commit remains with a value of 0.
+
+   After updating inode 258, because we have previously logged the inode
+   in the previous fsync, we log again the inode through the call to
+   btrfs_log_new_name(). This results in updating the inode's
+   ->last_log_commit from 0 to 1 (the current value of its
+   ->last_sub_trans).
+
+   The ->last_sub_trans of inode 257 is updated to 1, which is the ID of
+   the next log transaction;
+
+4) Then a buffered write against inode 258 is made. This leaves the value
+   of ->last_sub_trans as 1 (the ID of the current log transaction, stored
+   at root->log_transid);
+
+5) Then an fsync against inode 257 (or any other inode other than 258),
+   happens. This results in committing the log transaction with ID 1,
+   which results in updating root->last_log_commit to 1 and bumping
+   root->log_transid from 1 to 2;
+
+6) Then an fsync against inode 258 starts. We flush delalloc and wait only
+   for writeback to complete, since the full sync flag is not set in the
+   inode's runtime flags - we do not wait for ordered extents to complete.
+
+   Then, at btrfs_sync_file(), we call btrfs_inode_in_log() before the
+   ordered extent completes. The call returns true:
+
+     static inline bool btrfs_inode_in_log(...)
+     {
+         bool ret = false;
+
+         spin_lock(&inode->lock);
+         if (inode->logged_trans == generation &&
+             inode->last_sub_trans <= inode->last_log_commit &&
+             inode->last_sub_trans <= inode->root->last_log_commit)
+                 ret = true;
+         spin_unlock(&inode->lock);
+         return ret;
+     }
+
+   generation has a value of 6 (fs_info->generation), ->logged_trans also
+   has a value of 6 (set when we logged the inode during the first fsync
+   and when logging it during the rename), ->last_sub_trans has a value
+   of 1, set during the rename (step 3), ->last_log_commit also has a
+   value of 1 (set in step 3) and root->last_log_commit has a value of 1,
+   which was set in step 5 when fsyncing inode 257.
+
+   As a consequence we don't log the inode, any new extents and do not
+   sync the log, resulting in a data loss if a power failure happens
+   after the fsync and before the current transaction commits.
+   Also, because we do not log the inode, after a power failure the mtime
+   and ctime of the inode do not match those we had before.
+
+   When the ordered extent completes before we call btrfs_inode_in_log(),
+   then the call returns false and we log the inode and sync the log,
+   since at the end of ordered extent completion we update the inode and
+   set ->last_sub_trans to 2 (the value of root->log_transid) and
+   ->last_log_commit to 1.
+
+This problem is found after removing the check for the emptiness of the
+inode's list of modified extents in the recent commit 209ecbb8585bf6
+("btrfs: remove stale comment and logic from btrfs_inode_in_log()"),
+added in the 5.13 merge window. However checking the emptiness of the
+list is not really the way to solve this problem, and was never intended
+to, because while that solves the problem for COW writes, the problem
+persists for NOCOW writes because in that case the list is always empty.
+
+In the case of NOCOW writes, even though we wait for the writeback to
+complete before returning from btrfs_sync_file(), we end up not logging
+the inode, which has a new mtime/ctime, and because we don't sync the log,
+we never issue disk barriers (send REQ_PREFLUSH to the device) since that
+only happens when we sync the log (when we write super blocks at
+btrfs_sync_log()). So effectively, for a NOCOW case, when we return from
+btrfs_sync_file() to user space, we are not guaranteeing that the data is
+durably persisted on disk.
+
+Also, while the example above uses a rename exchange to show how the
+problem happens, it is not the only way to trigger it. An alternative
+could be adding a new hard link to inode 258, since that also results
+in calling btrfs_log_new_name() and updating the inode in the log.
+An example reproducer using the addition of a hard link instead of a
+rename operation:
+
+  $ mkfs.btrfs -f /dev/sdc
+  $ mount /dev/sdc /mnt
+
+  $ touch /mnt/bar
+  $ xfs_io -f -c "pwrite -S 0xab 0 1M" -c "fsync" /mnt/foo
+
+  $ ln /mnt/foo /mnt/foo_link
+  $ xfs_io -c "pwrite -S 0xcd 0 1M" /mnt/foo
+
+  $ xfs_io -c "fsync" /mnt/bar
+  $ xfs_io -c "fsync" /mnt/foo
+
+  <power failure>
+
+  # Replay the log:
+  $ mount /dev/sdc /mnt
+
+  # After this point file foo often has 1M filled with 0xab and not 0xcd
+
+The reasons leading to the final fsync of file foo, inode 258, not
+persisting the new data are the same as for the previous example with
+a rename operation.
+
+So fix by never skipping logging and log syncing when there are still any
+ordered extents in flight. To avoid making the conditional if statement
+that checks if logging an inode is needed harder to read, place all the
+logic into an helper function with separate if statements to make it more
+manageable and easier to read.
+
+A test case for fstests will follow soon.
+
+For NOCOW writes, the problem existed before commit b5e6c3e170b770
+("btrfs: always wait on ordered extents at fsync time"), introduced in
+kernel 4.19, then it went away with that commit since we started to always
+wait for ordered extent completion before logging.
+
+The problem came back again once the fast fsync path was changed again to
+avoid waiting for ordered extent completion, in commit 487781796d3022
+("btrfs: make fast fsyncs wait only for writeback"), added in kernel 5.10.
+
+However, for COW writes, the race only happens after the recent
+commit 209ecbb8585bf6 ("btrfs: remove stale comment and logic from
+btrfs_inode_in_log()"), introduced in the 5.13 merge window. For NOCOW
+writes, the bug existed before that commit. So tag 5.10+ as the release
+for stable backports.
+
+CC: stable@vger.kernel.org # 5.10+
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/btrfs/file.c     |   35 +++++++++++++++++++++++++----------
+ fs/btrfs/tree-log.c |    3 ++-
+ 2 files changed, 27 insertions(+), 11 deletions(-)
+
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2082,6 +2082,30 @@ static int start_ordered_ops(struct inod
+ 	return ret;
+ }
+ 
++static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
++{
++	struct btrfs_inode *inode = BTRFS_I(ctx->inode);
++	struct btrfs_fs_info *fs_info = inode->root->fs_info;
++
++	if (btrfs_inode_in_log(inode, fs_info->generation) &&
++	    list_empty(&ctx->ordered_extents))
++		return true;
++
++	/*
++	 * If we are doing a fast fsync we can not bail out if the inode's
++	 * last_trans is <= then the last committed transaction, because we only
++	 * update the last_trans of the inode during ordered extent completion,
++	 * and for a fast fsync we don't wait for that, we only wait for the
++	 * writeback to complete.
++	 */
++	if (inode->last_trans <= fs_info->last_trans_committed &&
++	    (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
++	     list_empty(&ctx->ordered_extents)))
++		return true;
++
++	return false;
++}
++
+ /*
+  * fsync call for both files and directories.  This logs the inode into
+  * the tree log instead of forcing full commits whenever possible.
+@@ -2196,17 +2220,8 @@ int btrfs_sync_file(struct file *file, l
+ 
+ 	atomic_inc(&root->log_batch);
+ 
+-	/*
+-	 * If we are doing a fast fsync we can not bail out if the inode's
+-	 * last_trans is <= then the last committed transaction, because we only
+-	 * update the last_trans of the inode during ordered extent completion,
+-	 * and for a fast fsync we don't wait for that, we only wait for the
+-	 * writeback to complete.
+-	 */
+ 	smp_mb();
+-	if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
+-	    (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
+-	     (full_sync || list_empty(&ctx.ordered_extents)))) {
++	if (skip_inode_logging(&ctx)) {
+ 		/*
+ 		 * We've had everything committed since the last time we were
+ 		 * modified so clear this flag in case it was set for whatever
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -6066,7 +6066,8 @@ static int btrfs_log_inode_parent(struct
+ 	 * (since logging them is pointless, a link count of 0 means they
+ 	 * will never be accessible).
+ 	 */
+-	if (btrfs_inode_in_log(inode, trans->transid) ||
++	if ((btrfs_inode_in_log(inode, trans->transid) &&
++	     list_empty(&ctx->ordered_extents)) ||
+ 	    inode->vfs_inode.i_nlink == 0) {
+ 		ret = BTRFS_NO_LOG_SYNC;
+ 		goto end_no_trans;
diff --git a/queue-5.11/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch b/queue-5.11/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
new file mode 100644
index 00000000000..3c189971340
--- /dev/null
+++ b/queue-5.11/drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
@@ -0,0 +1,38 @@
+From fe1c97d008f86f672f0e9265f180c22451ca3b9f Mon Sep 17 00:00:00 2001
+From: David Ward <david.ward@gatech.edu>
+Date: Mon, 10 May 2021 05:30:39 -0400
+Subject: drm/amd/display: Initialize attribute for hdcp_srm sysfs file
+
+From: David Ward <david.ward@gatech.edu>
+
+commit fe1c97d008f86f672f0e9265f180c22451ca3b9f upstream.
+
+It is stored in dynamically allocated memory, so sysfs_bin_attr_init() must
+be called to initialize it. (Note: "initialization" only sets the .attr.key
+member in this struct; it does not change the value of any other members.)
+
+Otherwise, when CONFIG_DEBUG_LOCK_ALLOC=y this message appears during boot:
+
+    BUG: key ffff9248900cd148 has not been registered!
+
+Fixes: 9037246bb2da ("drm/amd/display: Add sysfs interface for set/get srm")
+Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1586
+Reported-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
+Signed-off-by: David Ward <david.ward@gatech.edu>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
++++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+@@ -643,6 +643,7 @@ struct hdcp_workqueue *hdcp_create_workq
+ 
+ 	/* File created at /sys/class/drm/card0/device/hdcp_srm*/
+ 	hdcp_work[0].attr = data_attr;
++	sysfs_bin_attr_init(&hdcp_work[0].attr);
+ 
+ 	if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr))
+ 		DRM_WARN("Failed to create device file hdcp_srm");
diff --git a/queue-5.11/drm-i915-avoid-div-by-zero-on-gen2.patch b/queue-5.11/drm-i915-avoid-div-by-zero-on-gen2.patch
new file mode 100644
index 00000000000..f00654346d1
--- /dev/null
+++ b/queue-5.11/drm-i915-avoid-div-by-zero-on-gen2.patch
@@ -0,0 +1,47 @@
+From 4819d16d91145966ce03818a95169df1fd56b299 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= <ville.syrjala@linux.intel.com>
+Date: Wed, 21 Apr 2021 18:33:58 +0300
+Subject: drm/i915: Avoid div-by-zero on gen2
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
+
+commit 4819d16d91145966ce03818a95169df1fd56b299 upstream.
+
+Gen2 tiles are 2KiB in size so i915_gem_object_get_tile_row_size()
+can in fact return <4KiB, which leads to div-by-zero here.
+Avoid that.
+
+Not sure i915_gem_object_get_tile_row_size() is entirely
+sane anyway since it doesn't account for the different tile
+layouts on i8xx/i915...
+
+I'm not able to hit this before commit 6846895fde05 ("drm/i915:
+Replace PIN_NONFAULT with calls to PIN_NOEVICT") and it looks
+like I also need to run recent version of Mesa. With those in
+place xonotic trips on this quite easily on my 85x.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Ville SyrjÃ¤lÃ¤ <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20210421153401.13847-2-ville.syrjala@linux.intel.com
+(cherry picked from commit ed52c62d386f764194e0184fdb905d5f24194cae)
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gem/i915_gem_mman.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
++++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+@@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i9
+ 	struct i915_ggtt_view view;
+ 
+ 	if (i915_gem_object_is_tiled(obj))
+-		chunk = roundup(chunk, tile_row_pages(obj));
++		chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
+ 
+ 	view.type = I915_GGTT_VIEW_PARTIAL;
+ 	view.partial.offset = rounddown(page_offset, chunk);
diff --git a/queue-5.11/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch b/queue-5.11/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
new file mode 100644
index 00000000000..b6cdb6789d2
--- /dev/null
+++ b/queue-5.11/drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
@@ -0,0 +1,85 @@
+From 227545b9a08c68778ddd89428f99c351fc9315ac Mon Sep 17 00:00:00 2001
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Date: Fri, 30 Apr 2021 12:56:56 +0800
+Subject: drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected
+
+From: Kai-Heng Feng <kai.heng.feng@canonical.com>
+
+commit 227545b9a08c68778ddd89428f99c351fc9315ac upstream.
+
+Screen flickers rapidly when two 4K 60Hz monitors are in use. This issue
+doesn't happen when one monitor is 4K 60Hz (pixelclock 594MHz) and
+another one is 4K 30Hz (pixelclock 297MHz).
+
+The issue is gone after setting "power_dpm_force_performance_level" to
+"high". Following the indication, we found that the issue occurs when
+sclk is too low.
+
+So resolve the issue by disabling sclk switching when there are two
+monitors requires high pixelclock (> 297MHz).
+
+v2:
+ - Only apply the fix to Oland.
+Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/radeon/radeon.h    |    1 +
+ drivers/gpu/drm/radeon/radeon_pm.c |    8 ++++++++
+ drivers/gpu/drm/radeon/si_dpm.c    |    3 +++
+ 3 files changed, 12 insertions(+)
+
+--- a/drivers/gpu/drm/radeon/radeon.h
++++ b/drivers/gpu/drm/radeon/radeon.h
+@@ -1559,6 +1559,7 @@ struct radeon_dpm {
+ 	void                    *priv;
+ 	u32			new_active_crtcs;
+ 	int			new_active_crtc_count;
++	int			high_pixelclock_count;
+ 	u32			current_active_crtcs;
+ 	int			current_active_crtc_count;
+ 	bool single_display;
+--- a/drivers/gpu/drm/radeon/radeon_pm.c
++++ b/drivers/gpu/drm/radeon/radeon_pm.c
+@@ -1775,6 +1775,7 @@ static void radeon_pm_compute_clocks_dpm
+ 	struct drm_device *ddev = rdev->ddev;
+ 	struct drm_crtc *crtc;
+ 	struct radeon_crtc *radeon_crtc;
++	struct radeon_connector *radeon_connector;
+ 
+ 	if (!rdev->pm.dpm_enabled)
+ 		return;
+@@ -1784,6 +1785,7 @@ static void radeon_pm_compute_clocks_dpm
+ 	/* update active crtc counts */
+ 	rdev->pm.dpm.new_active_crtcs = 0;
+ 	rdev->pm.dpm.new_active_crtc_count = 0;
++	rdev->pm.dpm.high_pixelclock_count = 0;
+ 	if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
+ 		list_for_each_entry(crtc,
+ 				    &ddev->mode_config.crtc_list, head) {
+@@ -1791,6 +1793,12 @@ static void radeon_pm_compute_clocks_dpm
+ 			if (crtc->enabled) {
+ 				rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id);
+ 				rdev->pm.dpm.new_active_crtc_count++;
++				if (!radeon_crtc->connector)
++					continue;
++
++				radeon_connector = to_radeon_connector(radeon_crtc->connector);
++				if (radeon_connector->pixelclock_for_modeset > 297000)
++					rdev->pm.dpm.high_pixelclock_count++;
+ 			}
+ 		}
+ 	}
+--- a/drivers/gpu/drm/radeon/si_dpm.c
++++ b/drivers/gpu/drm/radeon/si_dpm.c
+@@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(
+ 		    (rdev->pdev->device == 0x6605)) {
+ 			max_sclk = 75000;
+ 		}
++
++		if (rdev->pm.dpm.high_pixelclock_count > 1)
++			disable_sclk_switching = true;
+ 	}
+ 
+ 	if (rps->vce_active) {
diff --git a/queue-5.11/hfsplus-prevent-corruption-in-shrinking-truncate.patch b/queue-5.11/hfsplus-prevent-corruption-in-shrinking-truncate.patch
new file mode 100644
index 00000000000..706a839af59
--- /dev/null
+++ b/queue-5.11/hfsplus-prevent-corruption-in-shrinking-truncate.patch
@@ -0,0 +1,89 @@
+From c3187cf32216313fb316084efac4dab3a8459b1d Mon Sep 17 00:00:00 2001
+From: Jouni Roivas <jouni.roivas@tuxera.com>
+Date: Fri, 14 May 2021 17:27:33 -0700
+Subject: hfsplus: prevent corruption in shrinking truncate
+
+From: Jouni Roivas <jouni.roivas@tuxera.com>
+
+commit c3187cf32216313fb316084efac4dab3a8459b1d upstream.
+
+I believe there are some issues introduced by commit 31651c607151
+("hfsplus: avoid deadlock on file truncation")
+
+HFS+ has extent records which always contains 8 extents.  In case the
+first extent record in catalog file gets full, new ones are allocated from
+extents overflow file.
+
+In case shrinking truncate happens to middle of an extent record which
+locates in extents overflow file, the logic in hfsplus_file_truncate() was
+changed so that call to hfs_brec_remove() is not guarded any more.
+
+Right action would be just freeing the extents that exceed the new size
+inside extent record by calling hfsplus_free_extents(), and then check if
+the whole extent record should be removed.  However since the guard
+(blk_cnt > start) is now after the call to hfs_brec_remove(), this has
+unfortunate effect that the last matching extent record is removed
+unconditionally.
+
+To reproduce this issue, create a file which has at least 10 extents, and
+then perform shrinking truncate into middle of the last extent record, so
+that the number of remaining extents is not under or divisible by 8.  This
+causes the last extent record (8 extents) to be removed totally instead of
+truncating into middle of it.  Thus this causes corruption, and lost data.
+
+Fix for this is simply checking if the new truncated end is below the
+start of this extent record, making it safe to remove the full extent
+record.  However call to hfs_brec_remove() can't be moved to it's previous
+place since we're dropping ->tree_lock and it can cause a race condition
+and the cached info being invalidated possibly corrupting the node data.
+
+Another issue is related to this one.  When entering into the block
+(blk_cnt > start) we are not holding the ->tree_lock.  We break out from
+the loop not holding the lock, but hfs_find_exit() does unlock it.  Not
+sure if it's possible for someone else to take the lock under our feet,
+but it can cause hard to debug errors and premature unlocking.  Even if
+there's no real risk of it, the locking should still always be kept in
+balance.  Thus taking the lock now just before the check.
+
+Link: https://lkml.kernel.org/r/20210429165139.3082828-1-jouni.roivas@tuxera.com
+Fixes: 31651c607151f ("hfsplus: avoid deadlock on file truncation")
+Signed-off-by: Jouni Roivas <jouni.roivas@tuxera.com>
+Reviewed-by: Anton Altaparmakov <anton@tuxera.com>
+Cc: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
+Cc: Viacheslav Dubeyko <slava@dubeyko.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hfsplus/extents.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/fs/hfsplus/extents.c
++++ b/fs/hfsplus/extents.c
+@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode
+ 		res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
+ 		if (res)
+ 			break;
+-		hfs_brec_remove(&fd);
+ 
+-		mutex_unlock(&fd.tree->tree_lock);
+ 		start = hip->cached_start;
++		if (blk_cnt <= start)
++			hfs_brec_remove(&fd);
++		mutex_unlock(&fd.tree->tree_lock);
+ 		hfsplus_free_extents(sb, hip->cached_extents,
+ 				     alloc_cnt - start, alloc_cnt - blk_cnt);
+ 		hfsplus_dump_extent(hip->cached_extents);
++		mutex_lock(&fd.tree->tree_lock);
+ 		if (blk_cnt > start) {
+ 			hip->extent_state |= HFSPLUS_EXT_DIRTY;
+ 			break;
+@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode
+ 		alloc_cnt = start;
+ 		hip->cached_start = hip->cached_blocks = 0;
+ 		hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
+-		mutex_lock(&fd.tree->tree_lock);
+ 	}
+ 	hfs_find_exit(&fd);
+ 
diff --git a/queue-5.11/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch b/queue-5.11/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
new file mode 100644
index 00000000000..574bcc95091
--- /dev/null
+++ b/queue-5.11/kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
@@ -0,0 +1,96 @@
+From f649dc0e0d7b509c75570ee403723660f5b72ec7 Mon Sep 17 00:00:00 2001
+From: Peter Collingbourne <pcc@google.com>
+Date: Fri, 14 May 2021 17:27:27 -0700
+Subject: kasan: fix unit tests with CONFIG_UBSAN_LOCAL_BOUNDS enabled
+
+From: Peter Collingbourne <pcc@google.com>
+
+commit f649dc0e0d7b509c75570ee403723660f5b72ec7 upstream.
+
+These tests deliberately access these arrays out of bounds, which will
+cause the dynamic local bounds checks inserted by
+CONFIG_UBSAN_LOCAL_BOUNDS to fail and panic the kernel.  To avoid this
+problem, access the arrays via volatile pointers, which will prevent the
+compiler from being able to determine the array bounds.
+
+These accesses use volatile pointers to char (char *volatile) rather than
+the more conventional pointers to volatile char (volatile char *) because
+we want to prevent the compiler from making inferences about the pointer
+itself (i.e.  its array bounds), not the data that it refers to.
+
+Link: https://lkml.kernel.org/r/20210507025915.1464056-1-pcc@google.com
+Link: https://linux-review.googlesource.com/id/I90b1713fbfa1bf68ff895aef099ea77b98a7c3b9
+Signed-off-by: Peter Collingbourne <pcc@google.com>
+Tested-by: Alexander Potapenko <glider@google.com>
+Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
+Cc: Peter Collingbourne <pcc@google.com>
+Cc: George Popescu <georgepope@android.com>
+Cc: Elena Petrova <lenaptr@google.com>
+Cc: Evgenii Stepanov <eugenis@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/test_kasan.c |   29 +++++++++++++++++++++++------
+ 1 file changed, 23 insertions(+), 6 deletions(-)
+
+--- a/lib/test_kasan.c
++++ b/lib/test_kasan.c
+@@ -449,8 +449,20 @@ static char global_array[10];
+ 
+ static void kasan_global_oob(struct kunit *test)
+ {
+-	volatile int i = 3;
+-	char *p = &global_array[ARRAY_SIZE(global_array) + i];
++	/*
++	 * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
++	 * from failing here and panicing the kernel, access the array via a
++	 * volatile pointer, which will prevent the compiler from being able to
++	 * determine the array bounds.
++	 *
++	 * This access uses a volatile pointer to char (char *volatile) rather
++	 * than the more conventional pointer to volatile char (volatile char *)
++	 * because we want to prevent the compiler from making inferences about
++	 * the pointer itself (i.e. its array bounds), not the data that it
++	 * refers to.
++	 */
++	char *volatile array = global_array;
++	char *p = &array[ARRAY_SIZE(global_array) + 3];
+ 
+ 	/* Only generic mode instruments globals. */
+ 	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+@@ -479,8 +491,9 @@ static void ksize_unpoisons_memory(struc
+ static void kasan_stack_oob(struct kunit *test)
+ {
+ 	char stack_array[10];
+-	volatile int i = OOB_TAG_OFF;
+-	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
++	/* See comment in kasan_global_oob. */
++	char *volatile array = stack_array;
++	char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
+ 
+ 	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+ 		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
+@@ -494,7 +507,9 @@ static void kasan_alloca_oob_left(struct
+ {
+ 	volatile int i = 10;
+ 	char alloca_array[i];
+-	char *p = alloca_array - 1;
++	/* See comment in kasan_global_oob. */
++	char *volatile array = alloca_array;
++	char *p = array - 1;
+ 
+ 	/* Only generic mode instruments dynamic allocas. */
+ 	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
+@@ -514,7 +529,9 @@ static void kasan_alloca_oob_right(struc
+ {
+ 	volatile int i = 10;
+ 	char alloca_array[i];
+-	char *p = alloca_array + i;
++	/* See comment in kasan_global_oob. */
++	char *volatile array = alloca_array;
++	char *p = array + i;
+ 
+ 	/* Only generic mode instruments dynamic allocas. */
+ 	if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
diff --git a/queue-5.11/kvm-exit-halt-polling-on-need_resched-as-well.patch b/queue-5.11/kvm-exit-halt-polling-on-need_resched-as-well.patch
new file mode 100644
index 00000000000..88ee88b5727
--- /dev/null
+++ b/queue-5.11/kvm-exit-halt-polling-on-need_resched-as-well.patch
@@ -0,0 +1,38 @@
+From 262de4102c7bb8e59f26a967a8ffe8cce85cc537 Mon Sep 17 00:00:00 2001
+From: Benjamin Segall <bsegall@google.com>
+Date: Thu, 29 Apr 2021 16:22:34 +0000
+Subject: kvm: exit halt polling on need_resched() as well
+
+From: Benjamin Segall <bsegall@google.com>
+
+commit 262de4102c7bb8e59f26a967a8ffe8cce85cc537 upstream.
+
+single_task_running() is usually more general than need_resched()
+but CFS_BANDWIDTH throttling will use resched_task() when there
+is just one task to get the task to block. This was causing
+long-need_resched warnings and was likely allowing VMs to
+overrun their quota when halt polling.
+
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Venkatesh Srinivas <venkateshs@chromium.org>
+Message-Id: <20210429162233.116849-1-venkateshs@chromium.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ virt/kvm/kvm_main.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -2814,7 +2814,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcp
+ 				goto out;
+ 			}
+ 			poll_end = cur = ktime_get();
+-		} while (single_task_running() && ktime_before(cur, stop));
++		} while (single_task_running() && !need_resched() &&
++			 ktime_before(cur, stop));
+ 	}
+ 
+ 	prepare_to_rcuwait(&vcpu->wait);
diff --git a/queue-5.11/mm-hugetlb-fix-f_seal_future_write.patch b/queue-5.11/mm-hugetlb-fix-f_seal_future_write.patch
new file mode 100644
index 00000000000..03d3c280d4e
--- /dev/null
+++ b/queue-5.11/mm-hugetlb-fix-f_seal_future_write.patch
@@ -0,0 +1,150 @@
+From 22247efd822e6d263f3c8bd327f3f769aea9b1d9 Mon Sep 17 00:00:00 2001
+From: Peter Xu <peterx@redhat.com>
+Date: Fri, 14 May 2021 17:27:04 -0700
+Subject: mm/hugetlb: fix F_SEAL_FUTURE_WRITE
+
+From: Peter Xu <peterx@redhat.com>
+
+commit 22247efd822e6d263f3c8bd327f3f769aea9b1d9 upstream.
+
+Patch series "mm/hugetlb: Fix issues on file sealing and fork", v2.
+
+Hugh reported issue with F_SEAL_FUTURE_WRITE not applied correctly to
+hugetlbfs, which I can easily verify using the memfd_test program, which
+seems that the program is hardly run with hugetlbfs pages (as by default
+shmem).
+
+Meanwhile I found another probably even more severe issue on that hugetlb
+fork won't wr-protect child cow pages, so child can potentially write to
+parent private pages.  Patch 2 addresses that.
+
+After this series applied, "memfd_test hugetlbfs" should start to pass.
+
+This patch (of 2):
+
+F_SEAL_FUTURE_WRITE is missing for hugetlb starting from the first day.
+There is a test program for that and it fails constantly.
+
+$ ./memfd_test hugetlbfs
+memfd-hugetlb: CREATE
+memfd-hugetlb: BASIC
+memfd-hugetlb: SEAL-WRITE
+memfd-hugetlb: SEAL-FUTURE-WRITE
+mmap() didn't fail as expected
+Aborted (core dumped)
+
+I think it's probably because no one is really running the hugetlbfs test.
+
+Fix it by checking FUTURE_WRITE also in hugetlbfs_file_mmap() as what we
+do in shmem_mmap().  Generalize a helper for that.
+
+Link: https://lkml.kernel.org/r/20210503234356.9097-1-peterx@redhat.com
+Link: https://lkml.kernel.org/r/20210503234356.9097-2-peterx@redhat.com
+Fixes: ab3948f58ff84 ("mm/memfd: add an F_SEAL_FUTURE_WRITE seal to memfd")
+Signed-off-by: Peter Xu <peterx@redhat.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Joel Fernandes (Google) <joel@joelfernandes.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/hugetlbfs/inode.c |    5 +++++
+ include/linux/mm.h   |   32 ++++++++++++++++++++++++++++++++
+ mm/shmem.c           |   22 ++++------------------
+ 3 files changed, 41 insertions(+), 18 deletions(-)
+
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct
+ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ 	struct inode *inode = file_inode(file);
++	struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ 	loff_t len, vma_len;
+ 	int ret;
+ 	struct hstate *h = hstate_file(file);
+@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct fi
+ 	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
+ 	vma->vm_ops = &hugetlb_vm_ops;
+ 
++	ret = seal_check_future_write(info->seals, vma);
++	if (ret)
++		return ret;
++
+ 	/*
+ 	 * page based offset in vm_pgoff could be sufficiently large to
+ 	 * overflow a loff_t when converted to byte offset.  This can
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -3191,5 +3191,37 @@ unsigned long wp_shared_mapping_range(st
+ 
+ extern int sysctl_nr_trim_pages;
+ 
++/**
++ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
++ * @seals: the seals to check
++ * @vma: the vma to operate on
++ *
++ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
++ * the vma flags.  Return 0 if check pass, or <0 for errors.
++ */
++static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
++{
++	if (seals & F_SEAL_FUTURE_WRITE) {
++		/*
++		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
++		 * "future write" seal active.
++		 */
++		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
++			return -EPERM;
++
++		/*
++		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
++		 * MAP_SHARED and read-only, take care to not allow mprotect to
++		 * revert protections on such mappings. Do this only for shared
++		 * mappings. For private mappings, don't need to mask
++		 * VM_MAYWRITE as we still want them to be COW-writable.
++		 */
++		if (vma->vm_flags & VM_SHARED)
++			vma->vm_flags &= ~(VM_MAYWRITE);
++	}
++
++	return 0;
++}
++
+ #endif /* __KERNEL__ */
+ #endif /* _LINUX_MM_H */
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2256,25 +2256,11 @@ out_nomem:
+ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+ {
+ 	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
++	int ret;
+ 
+-	if (info->seals & F_SEAL_FUTURE_WRITE) {
+-		/*
+-		 * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+-		 * "future write" seal active.
+-		 */
+-		if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+-			return -EPERM;
+-
+-		/*
+-		 * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+-		 * MAP_SHARED and read-only, take care to not allow mprotect to
+-		 * revert protections on such mappings. Do this only for shared
+-		 * mappings. For private mappings, don't need to mask
+-		 * VM_MAYWRITE as we still want them to be COW-writable.
+-		 */
+-		if (vma->vm_flags & VM_SHARED)
+-			vma->vm_flags &= ~(VM_MAYWRITE);
+-	}
++	ret = seal_check_future_write(info->seals, vma);
++	if (ret)
++		return ret;
+ 
+ 	/* arm64 - allow memory tagging on RAM-based files */
+ 	vma->vm_flags |= VM_MTE_ALLOWED;
diff --git a/queue-5.11/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch b/queue-5.11/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
new file mode 100644
index 00000000000..8f9cdcc4d6b
--- /dev/null
+++ b/queue-5.11/powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
@@ -0,0 +1,70 @@
+From aec86b052df6541cc97c5fca44e5934cbea4963b Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 6 May 2021 14:49:59 +1000
+Subject: powerpc/64s: Fix crashes when toggling entry flush barrier
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit aec86b052df6541cc97c5fca44e5934cbea4963b upstream.
+
+The entry flush mitigation can be enabled/disabled at runtime via a
+debugfs file (entry_flush), which causes the kernel to patch itself to
+enable/disable the relevant mitigations.
+
+However depending on which mitigation we're using, it may not be safe to
+do that patching while other CPUs are active. For example the following
+crash:
+
+  sleeper[15639]: segfault (11) at c000000000004c20 nip c000000000004c20 lr c000000000004c20
+
+Shows that we returned to userspace with a corrupted LR that points into
+the kernel, due to executing the partially patched call to the fallback
+entry flush (ie. we missed the LR restore).
+
+Fix it by doing the patching under stop machine. The CPUs that aren't
+doing the patching will be spinning in the core of the stop machine
+logic. That is currently sufficient for our purposes, because none of
+the patching we do is to that code or anywhere in the vicinity.
+
+Fixes: f79643787e0a ("powerpc/64s: flush L1D on kernel entry")
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210506044959.1298123-2-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/lib/feature-fixups.c |   16 +++++++++++++++-
+ 1 file changed, 15 insertions(+), 1 deletion(-)
+
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -299,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_fl
+ 						: "unknown");
+ }
+ 
+-void do_entry_flush_fixups(enum l1d_flush_type types)
++static int __do_entry_flush_fixups(void *data)
+ {
++	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
+ 	unsigned int instrs[3], *dest;
+ 	long *start, *end;
+ 	int i;
+@@ -369,6 +370,19 @@ void do_entry_flush_fixups(enum l1d_flus
+ 							: "ori type" :
+ 		(types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
+ 						: "unknown");
++
++	return 0;
++}
++
++void do_entry_flush_fixups(enum l1d_flush_type types)
++{
++	/*
++	 * The call to the fallback flush can not be safely patched in/out while
++	 * other CPUs are executing it. So call __do_entry_flush_fixups() on one
++	 * CPU while all other CPUs spin in the stop machine core with interrupts
++	 * hard disabled.
++	 */
++	stop_machine(__do_entry_flush_fixups, &types, NULL);
+ }
+ 
+ void do_rfi_flush_fixups(enum l1d_flush_type types)
diff --git a/queue-5.11/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch b/queue-5.11/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
new file mode 100644
index 00000000000..c9ef9030a6e
--- /dev/null
+++ b/queue-5.11/powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
@@ -0,0 +1,78 @@
+From 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a Mon Sep 17 00:00:00 2001
+From: Michael Ellerman <mpe@ellerman.id.au>
+Date: Thu, 6 May 2021 14:49:58 +1000
+Subject: powerpc/64s: Fix crashes when toggling stf barrier
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+commit 8ec7791bae1327b1c279c5cd6e929c3b12daaf0a upstream.
+
+The STF (store-to-load forwarding) barrier mitigation can be
+enabled/disabled at runtime via a debugfs file (stf_barrier), which
+causes the kernel to patch itself to enable/disable the relevant
+mitigations.
+
+However depending on which mitigation we're using, it may not be safe to
+do that patching while other CPUs are active. For example the following
+crash:
+
+  User access of kernel address (c00000003fff5af0) - exploit attempt? (uid: 0)
+  segfault (11) at c00000003fff5af0 nip 7fff8ad12198 lr 7fff8ad121f8 code 1
+  code: 40820128 e93c00d0 e9290058 7c292840 40810058 38600000 4bfd9a81 e8410018
+  code: 2c030006 41810154 3860ffb6 e9210098 <e94d8ff0> 7d295279 39400000 40820a3c
+
+Shows that we returned to userspace without restoring the user r13
+value, due to executing the partially patched STF exit code.
+
+Fix it by doing the patching under stop machine. The CPUs that aren't
+doing the patching will be spinning in the core of the stop machine
+logic. That is currently sufficient for our purposes, because none of
+the patching we do is to that code or anywhere in the vicinity.
+
+Fixes: a048a07d7f45 ("powerpc/64s: Add support for a store forwarding barrier at kernel entry/exit")
+Cc: stable@vger.kernel.org # v4.17+
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20210506044959.1298123-1-mpe@ellerman.id.au
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/lib/feature-fixups.c |   19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/lib/feature-fixups.c
++++ b/arch/powerpc/lib/feature-fixups.c
+@@ -14,6 +14,7 @@
+ #include <linux/string.h>
+ #include <linux/init.h>
+ #include <linux/sched/mm.h>
++#include <linux/stop_machine.h>
+ #include <asm/cputable.h>
+ #include <asm/code-patching.h>
+ #include <asm/page.h>
+@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(e
+ 		                                           : "unknown");
+ }
+ 
++static int __do_stf_barrier_fixups(void *data)
++{
++	enum stf_barrier_type *types = data;
++
++	do_stf_entry_barrier_fixups(*types);
++	do_stf_exit_barrier_fixups(*types);
++
++	return 0;
++}
+ 
+ void do_stf_barrier_fixups(enum stf_barrier_type types)
+ {
+-	do_stf_entry_barrier_fixups(types);
+-	do_stf_exit_barrier_fixups(types);
++	/*
++	 * The call to the fallback entry flush, and the fallback/sync-ori exit
++	 * flush can not be safely patched in/out while other CPUs are executing
++	 * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
++	 * spin in the stop machine core with interrupts hard disabled.
++	 */
++	stop_machine(__do_stf_barrier_fixups, &types, NULL);
+ }
+ 
+ void do_uaccess_flush_fixups(enum l1d_flush_type types)
diff --git a/queue-5.11/series b/queue-5.11/series
index 3e4350137b3..7dcd05bc762 100644
--- a/queue-5.11/series
+++ b/queue-5.11/series
@@ -226,3 +226,24 @@ i40e-fix-use-after-free-in-i40e_client_subtask.patch
 i40e-fix-the-restart-auto-negotiation-after-fec-modi.patch
 i40e-fix-phy-type-identifiers-for-2.5g-and-5g-adapte.patch
 mptcp-fix-splat-when-closing-unaccepted-socket.patch
+arc-entry-fix-off-by-one-error-in-syscall-number-validation.patch
+arc-mm-pae-use-40-bit-physical-page-mask.patch
+arc-mm-use-max_high_pfn-as-a-highmem-zone-border.patch
+sh-remove-unused-variable.patch
+powerpc-64s-fix-crashes-when-toggling-stf-barrier.patch
+powerpc-64s-fix-crashes-when-toggling-entry-flush-barrier.patch
+hfsplus-prevent-corruption-in-shrinking-truncate.patch
+squashfs-fix-divide-error-in-calculate_skip.patch
+userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
+kasan-fix-unit-tests-with-config_ubsan_local_bounds-enabled.patch
+mm-hugetlb-fix-f_seal_future_write.patch
+blk-iocost-fix-weight-updates-of-inner-active-iocgs.patch
+x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
+arm64-mte-initialize-rgsr_el1.seed-in-__cpu_setup.patch
+arm64-fix-race-condition-on-pg_dcache_clean-in-__sync_icache_dcache.patch
+btrfs-fix-deadlock-when-cloning-inline-extents-and-using-qgroups.patch
+btrfs-fix-race-leading-to-unpersisted-data-and-metadata-on-fsync.patch
+drm-radeon-dpm-disable-sclk-switching-on-oland-when-two-4k-60hz-monitors-are-connected.patch
+drm-amd-display-initialize-attribute-for-hdcp_srm-sysfs-file.patch
+drm-i915-avoid-div-by-zero-on-gen2.patch
+kvm-exit-halt-polling-on-need_resched-as-well.patch
diff --git a/queue-5.11/sh-remove-unused-variable.patch b/queue-5.11/sh-remove-unused-variable.patch
new file mode 100644
index 00000000000..12b0fd8f6bf
--- /dev/null
+++ b/queue-5.11/sh-remove-unused-variable.patch
@@ -0,0 +1,38 @@
+From 0d3ae948741ac6d80e39ab27b45297367ee477de Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 14 Apr 2021 10:05:17 -0700
+Subject: sh: Remove unused variable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Eric Dumazet <edumazet@google.com>
+
+commit 0d3ae948741ac6d80e39ab27b45297367ee477de upstream.
+
+Removes this annoying warning:
+
+arch/sh/kernel/traps.c: In function ânmi_trap_handlerâ:
+arch/sh/kernel/traps.c:183:15: warning: unused variable âcpuâ [-Wunused-variable]
+  183 |  unsigned int cpu = smp_processor_id();
+
+Fixes: fe3f1d5d7cd3 ("sh: Get rid of nmi_count()")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210414170517.1205430-1-eric.dumazet@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/sh/kernel/traps.c |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/arch/sh/kernel/traps.c
++++ b/arch/sh/kernel/traps.c
+@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(
+ 
+ BUILD_TRAP_HANDLER(nmi)
+ {
+-	unsigned int cpu = smp_processor_id();
+ 	TRAP_HANDLER_DECL;
+ 
+ 	arch_ftrace_nmi_enter();
diff --git a/queue-5.11/squashfs-fix-divide-error-in-calculate_skip.patch b/queue-5.11/squashfs-fix-divide-error-in-calculate_skip.patch
new file mode 100644
index 00000000000..1894fad2bd4
--- /dev/null
+++ b/queue-5.11/squashfs-fix-divide-error-in-calculate_skip.patch
@@ -0,0 +1,53 @@
+From d6e621de1fceb3b098ebf435ef7ea91ec4838a1a Mon Sep 17 00:00:00 2001
+From: Phillip Lougher <phillip@squashfs.org.uk>
+Date: Fri, 14 May 2021 17:27:16 -0700
+Subject: squashfs: fix divide error in calculate_skip()
+
+From: Phillip Lougher <phillip@squashfs.org.uk>
+
+commit d6e621de1fceb3b098ebf435ef7ea91ec4838a1a upstream.
+
+Sysbot has reported a "divide error" which has been identified as being
+caused by a corrupted file_size value within the file inode.  This value
+has been corrupted to a much larger value than expected.
+
+Calculate_skip() is passed i_size_read(inode) >> msblk->block_log.  Due to
+the file_size value corruption this overflows the int argument/variable in
+that function, leading to the divide error.
+
+This patch changes the function to use u64.  This will accommodate any
+unexpectedly large values due to corruption.
+
+The value returned from calculate_skip() is clamped to be never more than
+SQUASHFS_CACHED_BLKS - 1, or 7.  So file_size corruption does not lead to
+an unexpectedly large return result here.
+
+Link: https://lkml.kernel.org/r/20210507152618.9447-1-phillip@squashfs.org.uk
+Signed-off-by: Phillip Lougher <phillip@squashfs.org.uk>
+Reported-by: <syzbot+e8f781243ce16ac2f962@syzkaller.appspotmail.com>
+Reported-by: <syzbot+7b98870d4fec9447b951@syzkaller.appspotmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/squashfs/file.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/squashfs/file.c
++++ b/fs/squashfs/file.c
+@@ -211,11 +211,11 @@ failure:
+  * If the skip factor is limited in this way then the file will use multiple
+  * slots.
+  */
+-static inline int calculate_skip(int blocks)
++static inline int calculate_skip(u64 blocks)
+ {
+-	int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
++	u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+ 		 * SQUASHFS_META_INDEXES);
+-	return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
++	return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
+ }
+ 
+ 
diff --git a/queue-5.11/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch b/queue-5.11/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
new file mode 100644
index 00000000000..a7d97ea6e69
--- /dev/null
+++ b/queue-5.11/userfaultfd-release-page-in-error-path-to-avoid-bug_on.patch
@@ -0,0 +1,64 @@
+From 7ed9d238c7dbb1fdb63ad96a6184985151b0171c Mon Sep 17 00:00:00 2001
+From: Axel Rasmussen <axelrasmussen@google.com>
+Date: Fri, 14 May 2021 17:27:19 -0700
+Subject: userfaultfd: release page in error path to avoid BUG_ON
+
+From: Axel Rasmussen <axelrasmussen@google.com>
+
+commit 7ed9d238c7dbb1fdb63ad96a6184985151b0171c upstream.
+
+Consider the following sequence of events:
+
+1. Userspace issues a UFFD ioctl, which ends up calling into
+   shmem_mfill_atomic_pte(). We successfully account the blocks, we
+   shmem_alloc_page(), but then the copy_from_user() fails. We return
+   -ENOENT. We don't release the page we allocated.
+2. Our caller detects this error code, tries the copy_from_user() after
+   dropping the mmap_lock, and retries, calling back into
+   shmem_mfill_atomic_pte().
+3. Meanwhile, let's say another process filled up the tmpfs being used.
+4. So shmem_mfill_atomic_pte() fails to account blocks this time, and
+   immediately returns - without releasing the page.
+
+This triggers a BUG_ON in our caller, which asserts that the page
+should always be consumed, unless -ENOENT is returned.
+
+To fix this, detect if we have such a "dangling" page when accounting
+fails, and if so, release it before returning.
+
+Link: https://lkml.kernel.org/r/20210428230858.348400-1-axelrasmussen@google.com
+Fixes: cb658a453b93 ("userfaultfd: shmem: avoid leaking blocks and used blocks in UFFDIO_COPY")
+Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
+Reported-by: Hugh Dickins <hughd@google.com>
+Acked-by: Hugh Dickins <hughd@google.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/shmem.c |   12 +++++++++++-
+ 1 file changed, 11 insertions(+), 1 deletion(-)
+
+--- a/mm/shmem.c
++++ b/mm/shmem.c
+@@ -2373,8 +2373,18 @@ static int shmem_mfill_atomic_pte(struct
+ 	pgoff_t offset, max_off;
+ 
+ 	ret = -ENOMEM;
+-	if (!shmem_inode_acct_block(inode, 1))
++	if (!shmem_inode_acct_block(inode, 1)) {
++		/*
++		 * We may have got a page, returned -ENOENT triggering a retry,
++		 * and now we find ourselves with -ENOMEM. Release the page, to
++		 * avoid a BUG_ON in our caller.
++		 */
++		if (unlikely(*pagep)) {
++			put_page(*pagep);
++			*pagep = NULL;
++		}
+ 		goto out;
++	}
+ 
+ 	if (!*pagep) {
+ 		page = shmem_alloc_page(gfp, info, pgoff);
diff --git a/queue-5.11/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch b/queue-5.11/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
new file mode 100644
index 00000000000..148e5943a5d
--- /dev/null
+++ b/queue-5.11/x86-sched-fix-the-amd-cppc-maximum-performance-value-on-certain-amd-ryzen-generations.patch
@@ -0,0 +1,106 @@
+From 3743d55b289c203d8f77b7cd47c24926b9d186ae Mon Sep 17 00:00:00 2001
+From: Huang Rui <ray.huang@amd.com>
+Date: Sun, 25 Apr 2021 15:34:51 +0800
+Subject: x86, sched: Fix the AMD CPPC maximum performance value on certain AMD Ryzen generations
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Huang Rui <ray.huang@amd.com>
+
+commit 3743d55b289c203d8f77b7cd47c24926b9d186ae upstream.
+
+Some AMD Ryzen generations has different calculation method on maximum
+performance. 255 is not for all ASICs, some specific generations should use 166
+as the maximum performance. Otherwise, it will report incorrect frequency value
+like below:
+
+  ~ â lscpu | grep MHz
+  CPU MHz:                         3400.000
+  CPU max MHz:                     7228.3198
+  CPU min MHz:                     2200.0000
+
+[ mingo: Tidied up whitespace use. ]
+[ Alexander Monakov <amonakov@ispras.ru>: fix 225 -> 255 typo. ]
+
+Fixes: 41ea667227ba ("x86, sched: Calculate frequency invariance for AMD systems")
+Fixes: 3c55e94c0ade ("cpufreq: ACPI: Extend frequency tables to cover boost frequencies")
+Reported-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
+Fixed-by: Alexander Monakov <amonakov@ispras.ru>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Huang Rui <ray.huang@amd.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Jason Bagavatsingham <jason.bagavatsingham@gmail.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20210425073451.2557394-1-ray.huang@amd.com
+Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=211791
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/include/asm/processor.h |    2 ++
+ arch/x86/kernel/cpu/amd.c        |   16 ++++++++++++++++
+ arch/x86/kernel/smpboot.c        |    2 +-
+ drivers/cpufreq/acpi-cpufreq.c   |    6 +++++-
+ 4 files changed, 24 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -805,8 +805,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_s
+ 
+ #ifdef CONFIG_CPU_SUP_AMD
+ extern u32 amd_get_nodes_per_socket(void);
++extern u32 amd_get_highest_perf(void);
+ #else
+ static inline u32 amd_get_nodes_per_socket(void)	{ return 0; }
++static inline u32 amd_get_highest_perf(void)		{ return 0; }
+ #endif
+ 
+ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -1170,3 +1170,19 @@ void set_dr_addr_mask(unsigned long mask
+ 		break;
+ 	}
+ }
++
++u32 amd_get_highest_perf(void)
++{
++	struct cpuinfo_x86 *c = &boot_cpu_data;
++
++	if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
++			       (c->x86_model >= 0x70 && c->x86_model < 0x80)))
++		return 166;
++
++	if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
++			       (c->x86_model >= 0x40 && c->x86_model < 0x70)))
++		return 166;
++
++	return 255;
++}
++EXPORT_SYMBOL_GPL(amd_get_highest_perf);
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -2046,7 +2046,7 @@ static bool amd_set_max_freq_ratio(void)
+ 		return false;
+ 	}
+ 
+-	highest_perf = perf_caps.highest_perf;
++	highest_perf = amd_get_highest_perf();
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+ 	if (!highest_perf || !nominal_perf) {
+--- a/drivers/cpufreq/acpi-cpufreq.c
++++ b/drivers/cpufreq/acpi-cpufreq.c
+@@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned
+ 		return 0;
+ 	}
+ 
+-	highest_perf = perf_caps.highest_perf;
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		highest_perf = amd_get_highest_perf();
++	else
++		highest_perf = perf_caps.highest_perf;
++
+ 	nominal_perf = perf_caps.nominal_perf;
+ 
+ 	if (!highest_perf || !nominal_perf) {