--- /dev/null
+From d78050ee35440d7879ed94011c52994b8932e96e Mon Sep 17 00:00:00 2001
+From: Catalin Marinas <catalin.marinas@arm.com>
+Date: Thu, 7 Jan 2021 14:40:08 +0000
+Subject: arm64: Remove arm64_dma32_phys_limit and its uses
+
+From: Catalin Marinas <catalin.marinas@arm.com>
+
+commit d78050ee35440d7879ed94011c52994b8932e96e upstream.
+
+With the introduction of a dynamic ZONE_DMA range based on DT or IORT
+information, there's no need for CMA allocations from the wider
+ZONE_DMA32 since on most platforms ZONE_DMA will cover the 32-bit
+addressable range. Remove the arm64_dma32_phys_limit and set
+arm64_dma_phys_limit to cover the smallest DMA range required on the
+platform. CMA allocation and crashkernel reservation now go in the
+dynamically sized ZONE_DMA, allowing correct functionality on RPi4.
+
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Chen Zhou <chenzhou10@huawei.com>
+Reviewed-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+Tested-by: Nicolas Saenz Julienne <nsaenzjulienne@suse.de> # On RPi4B
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/include/asm/processor.h | 3 +--
+ arch/arm64/mm/init.c | 33 ++++++++++++++++++---------------
+ 2 files changed, 19 insertions(+), 17 deletions(-)
+
+--- a/arch/arm64/include/asm/processor.h
++++ b/arch/arm64/include/asm/processor.h
+@@ -96,8 +96,7 @@
+ #endif /* CONFIG_ARM64_FORCE_52BIT */
+
+ extern phys_addr_t arm64_dma_phys_limit;
+-extern phys_addr_t arm64_dma32_phys_limit;
+-#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
++#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
+
+ struct debug_info {
+ #ifdef CONFIG_HAVE_HW_BREAKPOINT
+--- a/arch/arm64/mm/init.c
++++ b/arch/arm64/mm/init.c
+@@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1;
+ EXPORT_SYMBOL(memstart_addr);
+
+ /*
+- * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
+- * memory as some devices, namely the Raspberry Pi 4, have peripherals with
+- * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
+- * bit addressable memory area.
++ * If the corresponding config options are enabled, we create both ZONE_DMA
++ * and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
++ * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
++ * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
++ * otherwise it is empty.
+ */
+ phys_addr_t arm64_dma_phys_limit __ro_after_init;
+-phys_addr_t arm64_dma32_phys_limit __ro_after_init;
+
+ #ifdef CONFIG_KEXEC_CORE
+ /*
+@@ -84,7 +84,7 @@ static void __init reserve_crashkernel(v
+
+ if (crash_base == 0) {
+ /* Current arm64 boot protocol requires 2MB alignment */
+- crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
++ crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
+ crash_size, SZ_2M);
+ if (crash_base == 0) {
+ pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+@@ -189,6 +189,7 @@ static void __init zone_sizes_init(unsig
+ unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
+ unsigned int __maybe_unused acpi_zone_dma_bits;
+ unsigned int __maybe_unused dt_zone_dma_bits;
++ phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
+
+ #ifdef CONFIG_ZONE_DMA
+ acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
+@@ -198,8 +199,12 @@ static void __init zone_sizes_init(unsig
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
+ #endif
+ #ifdef CONFIG_ZONE_DMA32
+- max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
++ max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
++ if (!arm64_dma_phys_limit)
++ arm64_dma_phys_limit = dma32_phys_limit;
+ #endif
++ if (!arm64_dma_phys_limit)
++ arm64_dma_phys_limit = PHYS_MASK + 1;
+ max_zone_pfns[ZONE_NORMAL] = max;
+
+ free_area_init(max_zone_pfns);
+@@ -393,16 +398,9 @@ void __init arm64_memblock_init(void)
+
+ early_init_fdt_scan_reserved_mem();
+
+- if (IS_ENABLED(CONFIG_ZONE_DMA32))
+- arm64_dma32_phys_limit = max_zone_phys(32);
+- else
+- arm64_dma32_phys_limit = PHYS_MASK + 1;
+-
+ reserve_elfcorehdr();
+
+ high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
+-
+- dma_contiguous_reserve(arm64_dma32_phys_limit);
+ }
+
+ void __init bootmem_init(void)
+@@ -438,6 +436,11 @@ void __init bootmem_init(void)
+ zone_sizes_init(min, max);
+
+ /*
++ * Reserve the CMA area after arm64_dma_phys_limit was initialised.
++ */
++ dma_contiguous_reserve(arm64_dma_phys_limit);
++
++ /*
+ * request_standard_resources() depends on crashkernel's memory being
+ * reserved, so do it here.
+ */
+@@ -519,7 +522,7 @@ static void __init free_unused_memmap(vo
+ void __init mem_init(void)
+ {
+ if (swiotlb_force == SWIOTLB_FORCE ||
+- max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
++ max_pfn > PFN_DOWN(arm64_dma_phys_limit))
+ swiotlb_init(1);
+ else
+ swiotlb_force = SWIOTLB_NO_FORCE;
--- /dev/null
+From 8d432592f30fcc34ef5a10aac4887b4897884493 Mon Sep 17 00:00:00 2001
+From: Jonathon Reinhart <jonathon.reinhart@gmail.com>
+Date: Sat, 1 May 2021 04:28:22 -0400
+Subject: net: Only allow init netns to set default tcp cong to a restricted algo
+
+From: Jonathon Reinhart <jonathon.reinhart@gmail.com>
+
+commit 8d432592f30fcc34ef5a10aac4887b4897884493 upstream.
+
+tcp_set_default_congestion_control() is netns-safe in that it writes
+to &net->ipv4.tcp_congestion_control, but it also sets
+ca->flags |= TCP_CONG_NON_RESTRICTED which is not namespaced.
+This has the unintended side-effect of changing the global
+net.ipv4.tcp_allowed_congestion_control sysctl, despite the fact that it
+is read-only: 97684f0970f6 ("net: Make tcp_allowed_congestion_control
+readonly in non-init netns")
+
+Resolve this netns "leak" by only allowing the init netns to set the
+default algorithm to one that is restricted. This restriction could be
+removed if tcp_allowed_congestion_control were namespace-ified in the
+future.
+
+This bug was uncovered with
+https://github.com/JonathonReinhart/linux-netns-sysctl-verify
+
+Fixes: 6670e1524477 ("tcp: Namespace-ify sysctl_tcp_default_congestion_control")
+Signed-off-by: Jonathon Reinhart <jonathon.reinhart@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_cong.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/net/ipv4/tcp_cong.c
++++ b/net/ipv4/tcp_cong.c
+@@ -230,6 +230,10 @@ int tcp_set_default_congestion_control(s
+ ret = -ENOENT;
+ } else if (!bpf_try_module_get(ca, ca->owner)) {
+ ret = -EBUSY;
++ } else if (!net_eq(net, &init_net) &&
++ !(ca->flags & TCP_CONG_NON_RESTRICTED)) {
++ /* Only init netns can set default to a restricted algorithm */
++ ret = -EPERM;
+ } else {
+ prev = xchg(&net->ipv4.tcp_congestion_control, ca);
+ if (prev)
bpf-fix-alu32-const-subreg-bound-tracking-on-bitwise-operations.patch
bpf-ringbuf-deny-reserve-of-buffers-larger-than-ringbuf.patch
bpf-prevent-writable-memory-mapping-of-read-only-ringbuf-pages.patch
+arm64-remove-arm64_dma32_phys_limit-and-its-uses.patch
+net-only-allow-init-netns-to-set-default-tcp-cong-to-a-restricted-algo.patch
+smp-fix-smp_call_function_single_async-prototype.patch
--- /dev/null
+From foo@baz Wed May 12 02:09:13 PM CEST 2021
+From: Arnd Bergmann <arnd@arndb.de>
+Date: Wed, 5 May 2021 23:12:42 +0200
+Subject: smp: Fix smp_call_function_single_async prototype
+
+From: Arnd Bergmann <arnd@arndb.de>
+
+commit 1139aeb1c521eb4a050920ce6c64c36c4f2a3ab7 upstream.
+
+As of commit 966a967116e6 ("smp: Avoid using two cache lines for struct
+call_single_data"), the smp code prefers 32-byte aligned call_single_data
+objects for performance reasons, but the block layer includes an instance
+of this structure in the main 'struct request' that is more senstive
+to size than to performance here, see 4ccafe032005 ("block: unalign
+call_single_data in struct request").
+
+The result is a violation of the calling conventions that clang correctly
+points out:
+
+block/blk-mq.c:630:39: warning: passing 8-byte aligned argument to 32-byte aligned parameter 2 of 'smp_call_function_single_async' may result in an unaligned pointer access [-Walign-mismatch]
+ smp_call_function_single_async(cpu, &rq->csd);
+
+It does seem that the usage of the call_single_data without cache line
+alignment should still be allowed by the smp code, so just change the
+function prototype so it accepts both, but leave the default alignment
+unchanged for the other users. This seems better to me than adding
+a local hack to shut up an otherwise correct warning in the caller.
+
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Jens Axboe <axboe@kernel.dk>
+Link: https://lkml.kernel.org/r/20210505211300.3174456-1-arnd@kernel.org
+[nc: Fix conflicts, modify rq_csd_init]
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/smp.h | 2 +-
+ kernel/sched/core.c | 2 +-
+ kernel/smp.c | 20 ++++++++++----------
+ kernel/up.c | 2 +-
+ 4 files changed, 13 insertions(+), 13 deletions(-)
+
+--- a/include/linux/smp.h
++++ b/include/linux/smp.h
+@@ -74,7 +74,7 @@ void on_each_cpu_cond(smp_cond_func_t co
+ void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
+ void *info, bool wait, const struct cpumask *mask);
+
+-int smp_call_function_single_async(int cpu, call_single_data_t *csd);
++int smp_call_function_single_async(int cpu, struct __call_single_data *csd);
+
+ #ifdef CONFIG_SMP
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -321,7 +321,7 @@ void update_rq_clock(struct rq *rq)
+ }
+
+ static inline void
+-rq_csd_init(struct rq *rq, call_single_data_t *csd, smp_call_func_t func)
++rq_csd_init(struct rq *rq, struct __call_single_data *csd, smp_call_func_t func)
+ {
+ csd->flags = 0;
+ csd->func = func;
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -110,7 +110,7 @@ static DEFINE_PER_CPU(void *, cur_csd_in
+ static atomic_t csd_bug_count = ATOMIC_INIT(0);
+
+ /* Record current CSD work for current CPU, NULL to erase. */
+-static void csd_lock_record(call_single_data_t *csd)
++static void csd_lock_record(struct __call_single_data *csd)
+ {
+ if (!csd) {
+ smp_mb(); /* NULL cur_csd after unlock. */
+@@ -125,7 +125,7 @@ static void csd_lock_record(call_single_
+ /* Or before unlock, as the case may be. */
+ }
+
+-static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
++static __always_inline int csd_lock_wait_getcpu(struct __call_single_data *csd)
+ {
+ unsigned int csd_type;
+
+@@ -140,7 +140,7 @@ static __always_inline int csd_lock_wait
+ * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
+ * so waiting on other types gets much less information.
+ */
+-static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
++static __always_inline bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id)
+ {
+ int cpu = -1;
+ int cpux;
+@@ -204,7 +204,7 @@ static __always_inline bool csd_lock_wai
+ * previous function call. For multi-cpu calls its even more interesting
+ * as we'll have to ensure no other cpu is observing our csd.
+ */
+-static __always_inline void csd_lock_wait(call_single_data_t *csd)
++static __always_inline void csd_lock_wait(struct __call_single_data *csd)
+ {
+ int bug_id = 0;
+ u64 ts0, ts1;
+@@ -219,17 +219,17 @@ static __always_inline void csd_lock_wai
+ }
+
+ #else
+-static void csd_lock_record(call_single_data_t *csd)
++static void csd_lock_record(struct __call_single_data *csd)
+ {
+ }
+
+-static __always_inline void csd_lock_wait(call_single_data_t *csd)
++static __always_inline void csd_lock_wait(struct __call_single_data *csd)
+ {
+ smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
+ }
+ #endif
+
+-static __always_inline void csd_lock(call_single_data_t *csd)
++static __always_inline void csd_lock(struct __call_single_data *csd)
+ {
+ csd_lock_wait(csd);
+ csd->flags |= CSD_FLAG_LOCK;
+@@ -242,7 +242,7 @@ static __always_inline void csd_lock(cal
+ smp_wmb();
+ }
+
+-static __always_inline void csd_unlock(call_single_data_t *csd)
++static __always_inline void csd_unlock(struct __call_single_data *csd)
+ {
+ WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
+
+@@ -276,7 +276,7 @@ void __smp_call_single_queue(int cpu, st
+ * for execution on the given CPU. data must already have
+ * ->func, ->info, and ->flags set.
+ */
+-static int generic_exec_single(int cpu, call_single_data_t *csd)
++static int generic_exec_single(int cpu, struct __call_single_data *csd)
+ {
+ if (cpu == smp_processor_id()) {
+ smp_call_func_t func = csd->func;
+@@ -542,7 +542,7 @@ EXPORT_SYMBOL(smp_call_function_single);
+ * NOTE: Be careful, there is unfortunately no current debugging facility to
+ * validate the correctness of this serialization.
+ */
+-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
++int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
+ {
+ int err = 0;
+
+--- a/kernel/up.c
++++ b/kernel/up.c
+@@ -25,7 +25,7 @@ int smp_call_function_single(int cpu, vo
+ }
+ EXPORT_SYMBOL(smp_call_function_single);
+
+-int smp_call_function_single_async(int cpu, call_single_data_t *csd)
++int smp_call_function_single_async(int cpu, struct __call_single_data *csd)
+ {
+ unsigned long flags;
+