]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Mon, 13 Mar 2023 12:06:05 +0000 (08:06 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 13 Mar 2023 12:06:05 +0000 (08:06 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
59 files changed:
queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch [new file with mode: 0644]
queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch [new file with mode: 0644]
queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch [new file with mode: 0644]
queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch [new file with mode: 0644]
queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch [new file with mode: 0644]
queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch [new file with mode: 0644]
queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch [new file with mode: 0644]
queue-5.10/drm-msm-document-and-rename-preempt_lock.patch [new file with mode: 0644]
queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch [new file with mode: 0644]
queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch [new file with mode: 0644]
queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch [new file with mode: 0644]
queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch [new file with mode: 0644]
queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch [new file with mode: 0644]
queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch [new file with mode: 0644]
queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch [new file with mode: 0644]
queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch [new file with mode: 0644]
queue-5.10/irq-fix-typos-in-comments.patch [new file with mode: 0644]
queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch [new file with mode: 0644]
queue-5.10/irqdomain-fix-domain-registration-race.patch [new file with mode: 0644]
queue-5.10/irqdomain-fix-mapping-creation-race.patch [new file with mode: 0644]
queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch [new file with mode: 0644]
queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch [new file with mode: 0644]
queue-5.10/landlock-add-object-management.patch [new file with mode: 0644]
queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch [new file with mode: 0644]
queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch [new file with mode: 0644]
queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch [new file with mode: 0644]
queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch [new file with mode: 0644]
queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch [new file with mode: 0644]
queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch [new file with mode: 0644]
queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch [new file with mode: 0644]
queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch [new file with mode: 0644]
queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch [new file with mode: 0644]
queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch [new file with mode: 0644]
queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch [new file with mode: 0644]
queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch [new file with mode: 0644]
queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch [new file with mode: 0644]
queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch [new file with mode: 0644]
queue-5.10/riscv-add-header-include-guards-to-insn.h.patch [new file with mode: 0644]
queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch [new file with mode: 0644]
queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch [new file with mode: 0644]
queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch [new file with mode: 0644]
queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch [new file with mode: 0644]
queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch [new file with mode: 0644]
queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch [new file with mode: 0644]
queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch [new file with mode: 0644]
queue-5.10/selftests-landlock-add-clang-format-exceptions.patch [new file with mode: 0644]
queue-5.10/selftests-landlock-add-user-space-tests.patch [new file with mode: 0644]
queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch [new file with mode: 0644]
queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch [new file with mode: 0644]
queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/software-node-introduce-device_add_software_node.patch [new file with mode: 0644]
queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch [new file with mode: 0644]
queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch [new file with mode: 0644]
queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch [new file with mode: 0644]
queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch [new file with mode: 0644]
queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch [new file with mode: 0644]
queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch [new file with mode: 0644]
queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch [new file with mode: 0644]

diff --git a/queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch b/queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch
new file mode 100644 (file)
index 0000000..23f2fa7
--- /dev/null
@@ -0,0 +1,104 @@
+From e71e11098ba2822015d64028427de64e99a16ff0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 17:10:47 +0100
+Subject: arm64: efi: Make efi_rt_lock a raw_spinlock
+
+From: Pierre Gondois <pierre.gondois@arm.com>
+
+[ Upstream commit 0e68b5517d3767562889f1d83fdb828c26adb24f ]
+
+Running a rt-kernel base on 6.2.0-rc3-rt1 on an Ampere Altra outputs
+the following:
+  BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
+  in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9, name: kworker/u320:0
+  preempt_count: 2, expected: 0
+  RCU nest depth: 0, expected: 0
+  3 locks held by kworker/u320:0/9:
+  #0: ffff3fff8c27d128 ((wq_completion)efi_rts_wq){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41)
+  #1: ffff80000861bdd0 ((work_completion)(&efi_rts_work.work)){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41)
+  #2: ffffdf7e1ed3e460 (efi_rt_lock){+.+.}-{3:3}, at: efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101)
+  Preemption disabled at:
+  efi_virtmap_load (./arch/arm64/include/asm/mmu_context.h:248)
+  CPU: 0 PID: 9 Comm: kworker/u320:0 Tainted: G        W          6.2.0-rc3-rt1
+  Hardware name: WIWYNN Mt.Jade Server System B81.03001.0005/Mt.Jade Motherboard, BIOS 1.08.20220218 (SCP: 1.08.20220218) 2022/02/18
+  Workqueue: efi_rts_wq efi_call_rts
+  Call trace:
+  dump_backtrace (arch/arm64/kernel/stacktrace.c:158)
+  show_stack (arch/arm64/kernel/stacktrace.c:165)
+  dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4))
+  dump_stack (lib/dump_stack.c:114)
+  __might_resched (kernel/sched/core.c:10134)
+  rt_spin_lock (kernel/locking/rtmutex.c:1769 (discriminator 4))
+  efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101)
+  [...]
+
+This seems to come from commit ff7a167961d1 ("arm64: efi: Execute
+runtime services from a dedicated stack") which adds a spinlock. This
+spinlock is taken through:
+efi_call_rts()
+\-efi_call_virt()
+  \-efi_call_virt_pointer()
+    \-arch_efi_call_virt_setup()
+
+Make 'efi_rt_lock' a raw_spinlock to avoid being preempted.
+
+[ardb: The EFI runtime services are called with a different set of
+       translation tables, and are permitted to use the SIMD registers.
+       The context switch code preserves/restores neither, and so EFI
+       calls must be made with preemption disabled, rather than only
+       disabling migration.]
+
+Fixes: ff7a167961d1 ("arm64: efi: Execute runtime services from a dedicated stack")
+Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
+Cc: <stable@vger.kernel.org> # v6.1+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/efi.h | 6 +++---
+ arch/arm64/kernel/efi.c      | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
+index 16892f0d05ad6..538b6a1b198b9 100644
+--- a/arch/arm64/include/asm/efi.h
++++ b/arch/arm64/include/asm/efi.h
+@@ -25,7 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+ ({                                                                    \
+       efi_virtmap_load();                                             \
+       __efi_fpsimd_begin();                                           \
+-      spin_lock(&efi_rt_lock);                                        \
++      raw_spin_lock(&efi_rt_lock);                                    \
+ })
+ #define arch_efi_call_virt(p, f, args...)                             \
+@@ -37,12 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+ #define arch_efi_call_virt_teardown()                                 \
+ ({                                                                    \
+-      spin_unlock(&efi_rt_lock);                                      \
++      raw_spin_unlock(&efi_rt_lock);                                  \
+       __efi_fpsimd_end();                                             \
+       efi_virtmap_unload();                                           \
+ })
+-extern spinlock_t efi_rt_lock;
++extern raw_spinlock_t efi_rt_lock;
+ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+ #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
+index 72f432d23ec5c..3ee3b3daca47b 100644
+--- a/arch/arm64/kernel/efi.c
++++ b/arch/arm64/kernel/efi.c
+@@ -144,7 +144,7 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
+       return s;
+ }
+-DEFINE_SPINLOCK(efi_rt_lock);
++DEFINE_RAW_SPINLOCK(efi_rt_lock);
+ asmlinkage u64 *efi_rt_stack_top __ro_after_init;
+-- 
+2.39.2
+
diff --git a/queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch b/queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch
new file mode 100644 (file)
index 0000000..ace740f
--- /dev/null
@@ -0,0 +1,92 @@
+From d4d1fc0c2871c7e8f9ee89b3e532ad1d1688edcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 10:11:56 +0100
+Subject: bgmac: fix *initial* chip reset to support BCM5358
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
+
+[ Upstream commit f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 ]
+
+While bringing hardware up we should perform a full reset including the
+switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what
+specification says and what reference driver does.
+
+This seems to be critical for the BCM5358. Without this hardware doesn't
+get initialized properly and doesn't seem to transmit or receive any
+packets.
+
+Originally bgmac was calling bgmac_chip_reset() before setting
+"has_robosw" property which resulted in expected behaviour. That has
+changed as a side effect of adding platform device support which
+regressed BCM5358 support.
+
+Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support")
+Cc: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++--
+ drivers/net/ethernet/broadcom/bgmac.h | 2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index 9960127f612ea..bb999e67d7736 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac)
+               if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+                       flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+-                      if (!bgmac->has_robosw)
++                      if (bgmac->in_init || !bgmac->has_robosw)
+                               flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+               }
+               bgmac_clk_enable(bgmac, flags);
+       }
+-      if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
++      if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw))
+               bgmac_idm_write(bgmac, BCMA_IOCTL,
+                               bgmac_idm_read(bgmac, BCMA_IOCTL) &
+                               ~BGMAC_BCMA_IOCTL_SW_RESET);
+@@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+       struct net_device *net_dev = bgmac->net_dev;
+       int err;
++      bgmac->in_init = true;
++
+       bgmac_chip_intrs_off(bgmac);
+       net_dev->irq = bgmac->irq;
+@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+       /* Omit FCS from max MTU size */
+       net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
++      bgmac->in_init = false;
++
+       err = register_netdev(bgmac->net_dev);
+       if (err) {
+               dev_err(bgmac->dev, "Cannot register net device\n");
+diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
+index 351c598a3ec6d..d1200b27af1ed 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -512,6 +512,8 @@ struct bgmac {
+       int irq;
+       u32 int_mask;
++      bool in_init;
++
+       /* Current MAC state */
+       int mac_speed;
+       int mac_duplex;
+-- 
+2.39.2
+
diff --git a/queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch b/queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch
new file mode 100644 (file)
index 0000000..fcfd493
--- /dev/null
@@ -0,0 +1,114 @@
+From a11bbbffc1c785fdd6e539212e262c6a47fe0eb6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 18:43:57 -0800
+Subject: bnxt_en: Avoid order-5 memory allocation for TPA data
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ]
+
+The driver needs to keep track of all the possible concurrent TPA (GRO/LRO)
+completions on the aggregation ring.  On P5 chips, the maximum number
+of concurrent TPA is 256 and the amount of memory we allocate is order-5
+on systems using 4K pages.  Memory allocation failure has been reported:
+
+NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1
+CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1
+Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022
+Call Trace:
+ dump_stack+0x57/0x6e
+ warn_alloc.cold.120+0x7b/0xdd
+ ? _cond_resched+0x15/0x30
+ ? __alloc_pages_direct_compact+0x15f/0x170
+ __alloc_pages_slowpath.constprop.108+0xc58/0xc70
+ __alloc_pages_nodemask+0x2d0/0x300
+ kmalloc_order+0x24/0xe0
+ kmalloc_order_trace+0x19/0x80
+ bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en]
+ ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en]
+ __bnxt_open_nic+0x12e/0x780 [bnxt_en]
+ bnxt_open+0x10b/0x240 [bnxt_en]
+ __dev_open+0xe9/0x180
+ __dev_change_flags+0x1af/0x220
+ dev_change_flags+0x21/0x60
+ do_setlink+0x35c/0x1100
+
+Instead of allocating this big chunk of memory and dividing it up for the
+concurrent TPA instances, allocate each small chunk separately for each
+TPA instance.  This will reduce it to order-0 allocations.
+
+Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.")
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Damodharam Ammepalli <damodharam.ammepalli@broadcom.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index c4a768ce8c99d..6928c0b578abb 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2854,7 +2854,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
+ static void bnxt_free_tpa_info(struct bnxt *bp)
+ {
+-      int i;
++      int i, j;
+       for (i = 0; i < bp->rx_nr_rings; i++) {
+               struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+@@ -2862,8 +2862,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+               kfree(rxr->rx_tpa_idx_map);
+               rxr->rx_tpa_idx_map = NULL;
+               if (rxr->rx_tpa) {
+-                      kfree(rxr->rx_tpa[0].agg_arr);
+-                      rxr->rx_tpa[0].agg_arr = NULL;
++                      for (j = 0; j < bp->max_tpa; j++) {
++                              kfree(rxr->rx_tpa[j].agg_arr);
++                              rxr->rx_tpa[j].agg_arr = NULL;
++                      }
+               }
+               kfree(rxr->rx_tpa);
+               rxr->rx_tpa = NULL;
+@@ -2872,14 +2874,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+ {
+-      int i, j, total_aggs = 0;
++      int i, j;
+       bp->max_tpa = MAX_TPA;
+       if (bp->flags & BNXT_FLAG_CHIP_P5) {
+               if (!bp->max_tpa_v2)
+                       return 0;
+               bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+-              total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
+       }
+       for (i = 0; i < bp->rx_nr_rings; i++) {
+@@ -2893,12 +2894,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+               if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+                       continue;
+-              agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
+-              rxr->rx_tpa[0].agg_arr = agg;
+-              if (!agg)
+-                      return -ENOMEM;
+-              for (j = 1; j < bp->max_tpa; j++)
+-                      rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
++              for (j = 0; j < bp->max_tpa; j++) {
++                      agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
++                      if (!agg)
++                              return -ENOMEM;
++                      rxr->rx_tpa[j].agg_arr = agg;
++              }
+               rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+                                             GFP_KERNEL);
+               if (!rxr->rx_tpa_idx_map)
+-- 
+2.39.2
+
diff --git a/queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch b/queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch
new file mode 100644 (file)
index 0000000..35a8169
--- /dev/null
@@ -0,0 +1,98 @@
+From 01a893a6cbfda04f112334facb1d06d1465492ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 11:21:37 +0000
+Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR
+
+From: Lorenz Bauer <lorenz.bauer@isovalent.com>
+
+[ Upstream commit 9b459804ff9973e173fabafba2a1319f771e85fa ]
+
+btf_datasec_resolve contains a bug that causes the following BTF
+to fail loading:
+
+    [1] DATASEC a size=2 vlen=2
+        type_id=4 offset=0 size=1
+        type_id=7 offset=1 size=1
+    [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
+    [3] PTR (anon) type_id=2
+    [4] VAR a type_id=3 linkage=0
+    [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
+    [6] TYPEDEF td type_id=5
+    [7] VAR b type_id=6 linkage=0
+
+This error message is printed during btf_check_all_types:
+
+    [1] DATASEC a size=2 vlen=2
+        type_id=7 offset=1 size=1 Invalid type
+
+By tracing btf_*_resolve we can pinpoint the problem:
+
+    btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0
+        btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0
+            btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0
+        btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0
+    btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22
+
+The last invocation of btf_datasec_resolve should invoke btf_var_resolve
+by means of env_stack_push, instead it returns EINVAL. The reason is that
+env_stack_push is never executed for the second VAR.
+
+    if (!env_type_is_resolve_sink(env, var_type) &&
+        !env_type_is_resolved(env, var_type_id)) {
+        env_stack_set_next_member(env, i + 1);
+        return env_stack_push(env, var_type, var_type_id);
+    }
+
+env_type_is_resolve_sink() changes its behaviour based on resolve_mode.
+For RESOLVE_PTR, we can simplify the if condition to the following:
+
+    (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved()
+
+Since we're dealing with a VAR the clause evaluates to false. This is
+not sufficient to trigger the bug however. The log output and EINVAL
+are only generated if btf_type_id_size() fails.
+
+    if (!btf_type_id_size(btf, &type_id, &type_size)) {
+        btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+        return -EINVAL;
+    }
+
+Most types are sized, so for example a VAR referring to an INT is not a
+problem. The bug is only triggered if a VAR points at a modifier. Since
+we skipped btf_var_resolve that modifier was also never resolved, which
+means that btf_resolved_type_id returns 0 aka VOID for the modifier.
+This in turn causes btf_type_id_size to return NULL, triggering EINVAL.
+
+To summarise, the following conditions are necessary:
+
+- VAR pointing at PTR, STRUCT, UNION or ARRAY
+- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or
+  TYPE_TAG
+
+The fix is to reset resolve_mode to RESOLVE_TBD before attempting to
+resolve a VAR from a DATASEC.
+
+Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
+Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
+Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/btf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index 11b612e94e4e1..cb80d18a49b56 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -3541,6 +3541,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env,
+       struct btf *btf = env->btf;
+       u16 i;
++      env->resolve_mode = RESOLVE_TBD;
+       for_each_vsi_from(i, v->next_member, v->t, vsi) {
+               u32 var_type_id = vsi->type, type_id, type_size = 0;
+               const struct btf_type *var_type = btf_type_by_id(env->btf,
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch b/queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch
new file mode 100644 (file)
index 0000000..0b6306b
--- /dev/null
@@ -0,0 +1,49 @@
+From ba49194fa0fa953883ceba33e42bab3310521e19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:56 +0300
+Subject: drm/msm/a5xx: fix context faults during ring switch
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit 32e7083429d46f29080626fe387ff90c086b1fbe ]
+
+The rptr_addr is set in the preempt_init_ring(), which is called from
+a5xx_gpu_init(). It uses shadowptr() to set the address, however the
+shadow_iova is not yet initialized at that time. Move the rptr_addr
+setting to the a5xx_preempt_hw_init() which is called after setting the
+shadow_iova, getting the correct value for the address.
+
+Fixes: 8907afb476ac ("drm/msm: Allow a5xx to mark the RPTR shadow as privileged")
+Suggested-by: Rob Clark <robdclark@gmail.com>
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522640/
+Link: https://lore.kernel.org/r/20230214020956.164473-5-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 9da0aff0072d7..b8e71ad6f8d8a 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -210,6 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
+               a5xx_gpu->preempt[i]->wptr = 0;
+               a5xx_gpu->preempt[i]->rptr = 0;
+               a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
++              a5xx_gpu->preempt[i]->rptr_addr = shadowptr(a5xx_gpu, gpu->rb[i]);
+       }
+       /* Write a 0 to signal that we aren't switching pagetables */
+@@ -261,7 +262,6 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
+       ptr->data = 0;
+       ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE;
+-      ptr->rptr_addr = shadowptr(a5xx_gpu, ring);
+       ptr->counter = counters_iova;
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch b/queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch
new file mode 100644 (file)
index 0000000..0e3eea0
--- /dev/null
@@ -0,0 +1,41 @@
+From 6bd9ad57e2a4d722982c28bea33c15721d1111fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:53 +0300
+Subject: drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit a7a4c19c36de1e4b99b06e4060ccc8ab837725bc ]
+
+Rather than writing CP_PREEMPT_ENABLE_GLOBAL twice, follow the vendor
+kernel and set CP_PREEMPT_ENABLE_LOCAL register instead. a5xx_submit()
+will override it during submission, but let's get the sequence correct.
+
+Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522638/
+Link: https://lore.kernel.org/r/20230214020956.164473-2-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+index 0ca7e53db112a..64da65ae6d67e 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+@@ -144,8 +144,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
+       OUT_RING(ring, 1);
+       /* Enable local preemption for finegrain preemption */
+-      OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+-      OUT_RING(ring, 0x02);
++      OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
++      OUT_RING(ring, 0x1);
+       /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+       OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch b/queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch
new file mode 100644 (file)
index 0000000..71e32dd
--- /dev/null
@@ -0,0 +1,42 @@
+From 0eeb7ba42b85bfcad5a307f6b26dce6f8b22f535 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:55 +0300
+Subject: drm/msm/a5xx: fix the emptyness check in the preempt code
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit b4fb748f0b734ce1d2e7834998cc599fcbd25d67 ]
+
+Quoting Yassine: ring->memptrs->rptr is never updated and stays 0, so
+the comparison always evaluates to false and get_next_ring always
+returns ring 0 thinking it isn't empty.
+
+Fix this by calling get_rptr() instead of reading rptr directly.
+
+Reported-by: Yassine Oudjana <y.oudjana@protonmail.com>
+Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522642/
+Link: https://lore.kernel.org/r/20230214020956.164473-4-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 183de1139eeb6..9da0aff0072d7 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -63,7 +63,7 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+               struct msm_ringbuffer *ring = gpu->rb[i];
+               spin_lock_irqsave(&ring->preempt_lock, flags);
+-              empty = (get_wptr(ring) == ring->memptrs->rptr);
++              empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring));
+               spin_unlock_irqrestore(&ring->preempt_lock, flags);
+               if (!empty)
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-msm-document-and-rename-preempt_lock.patch b/queue-5.10/drm-msm-document-and-rename-preempt_lock.patch
new file mode 100644 (file)
index 0000000..fae94c9
--- /dev/null
@@ -0,0 +1,143 @@
+From 3c8d9d7d6bd4c73398e181bb2c3084cf13d15cc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Oct 2020 09:51:15 -0700
+Subject: drm/msm: Document and rename preempt_lock
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 77c406038e830a4b6219b14a116cd2a6ac9f4908 ]
+
+Before adding another lock, give ring->lock a more descriptive name.
+
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
+Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Stable-dep-of: b4fb748f0b73 ("drm/msm/a5xx: fix the emptyness check in the preempt code")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_gpu.c     |  4 ++--
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 ++++++------
+ drivers/gpu/drm/msm/adreno/a6xx_gpu.c     |  4 ++--
+ drivers/gpu/drm/msm/msm_ringbuffer.c      |  2 +-
+ drivers/gpu/drm/msm/msm_ringbuffer.h      |  7 ++++++-
+ 5 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+index 64da65ae6d67e..6f84db97e20e8 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+@@ -36,7 +36,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+               OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+       }
+-      spin_lock_irqsave(&ring->lock, flags);
++      spin_lock_irqsave(&ring->preempt_lock, flags);
+       /* Copy the shadow to the actual register */
+       ring->cur = ring->next;
+@@ -44,7 +44,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+       /* Make sure to wrap wptr if we need to */
+       wptr = get_wptr(ring);
+-      spin_unlock_irqrestore(&ring->lock, flags);
++      spin_unlock_irqrestore(&ring->preempt_lock, flags);
+       /* Make sure everything is posted before making a decision */
+       mb();
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 7e04509c4e1f0..183de1139eeb6 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -45,9 +45,9 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+       if (!ring)
+               return;
+-      spin_lock_irqsave(&ring->lock, flags);
++      spin_lock_irqsave(&ring->preempt_lock, flags);
+       wptr = get_wptr(ring);
+-      spin_unlock_irqrestore(&ring->lock, flags);
++      spin_unlock_irqrestore(&ring->preempt_lock, flags);
+       gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+ }
+@@ -62,9 +62,9 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+               bool empty;
+               struct msm_ringbuffer *ring = gpu->rb[i];
+-              spin_lock_irqsave(&ring->lock, flags);
++              spin_lock_irqsave(&ring->preempt_lock, flags);
+               empty = (get_wptr(ring) == ring->memptrs->rptr);
+-              spin_unlock_irqrestore(&ring->lock, flags);
++              spin_unlock_irqrestore(&ring->preempt_lock, flags);
+               if (!empty)
+                       return ring;
+@@ -132,9 +132,9 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
+       }
+       /* Make sure the wptr doesn't update while we're in motion */
+-      spin_lock_irqsave(&ring->lock, flags);
++      spin_lock_irqsave(&ring->preempt_lock, flags);
+       a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
+-      spin_unlock_irqrestore(&ring->lock, flags);
++      spin_unlock_irqrestore(&ring->preempt_lock, flags);
+       /* Set the address of the incoming preemption record */
+       gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+index dffc133b8b1cc..29b40acedb389 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+@@ -65,7 +65,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+               OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
+       }
+-      spin_lock_irqsave(&ring->lock, flags);
++      spin_lock_irqsave(&ring->preempt_lock, flags);
+       /* Copy the shadow to the actual register */
+       ring->cur = ring->next;
+@@ -73,7 +73,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+       /* Make sure to wrap wptr if we need to */
+       wptr = get_wptr(ring);
+-      spin_unlock_irqrestore(&ring->lock, flags);
++      spin_unlock_irqrestore(&ring->preempt_lock, flags);
+       /* Make sure everything is posted before making a decision */
+       mb();
+diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
+index 935bf9b1d9418..1b6958e908dca 100644
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
+@@ -46,7 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
+       ring->memptrs_iova = memptrs_iova;
+       INIT_LIST_HEAD(&ring->submits);
+-      spin_lock_init(&ring->lock);
++      spin_lock_init(&ring->preempt_lock);
+       snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
+diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
+index 0987d6bf848cf..4956d1bc5d0e1 100644
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
+@@ -46,7 +46,12 @@ struct msm_ringbuffer {
+       struct msm_rbmemptrs *memptrs;
+       uint64_t memptrs_iova;
+       struct msm_fence_context *fctx;
+-      spinlock_t lock;
++
++      /*
++       * preempt_lock protects preemption and serializes wptr updates against
++       * preemption.  Can be aquired from irq context.
++       */
++      spinlock_t preempt_lock;
+ };
+ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch b/queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch
new file mode 100644 (file)
index 0000000..8434a98
--- /dev/null
@@ -0,0 +1,49 @@
+From 75adf877270713e38c7156498a872511c6bffb9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 15:50:48 -0800
+Subject: drm/msm: Fix potential invalid ptr free
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 8a86f213f4426f19511a16d886871805b35c3acf ]
+
+The error path cleanup expects that chain and syncobj are either NULL or
+valid pointers.  But post_deps was not allocated with __GFP_ZERO.
+
+Fixes: ab723b7a992a ("drm/msm: Add syncobj support.")
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+Patchwork: https://patchwork.freedesktop.org/patch/523051/
+Link: https://lore.kernel.org/r/20230215235048.1166484-1-robdclark@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_gem_submit.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index aa5c60a7132d8..c4e5037512b9d 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -494,8 +494,8 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+       int ret = 0;
+       uint32_t i, j;
+-      post_deps = kmalloc_array(nr_syncobjs, sizeof(*post_deps),
+-                                GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
++      post_deps = kcalloc(nr_syncobjs, sizeof(*post_deps),
++                          GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+       if (!post_deps)
+               return ERR_PTR(-ENOMEM);
+@@ -510,7 +510,6 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+               }
+               post_deps[i].point = syncobj_desc.point;
+-              post_deps[i].chain = NULL;
+               if (syncobj_desc.flags) {
+                       ret = -EINVAL;
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch b/queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch
new file mode 100644 (file)
index 0000000..e161a9c
--- /dev/null
@@ -0,0 +1,64 @@
+From ebecc8c1d6e71abb356922a23a6f45847343f593 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Oct 2022 12:42:29 +0100
+Subject: drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype
+
+From: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+
+[ Upstream commit 3638a820c5c3b52f327cebb174fd4274bee08aa7 ]
+
+gcc-13 warns about mismatching types for enums. That revealed switched
+arguments of nv50_wndw_new_():
+  drivers/gpu/drm/nouveau/dispnv50/wndw.c:696:1: error: conflicting types for 'nv50_wndw_new_' due to enum/integer mismatch; have 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type,  const char *, int,  const u32 *, u32,  enum nv50_disp_interlock_type,  u32,  struct nv50_wndw **)'
+  drivers/gpu/drm/nouveau/dispnv50/wndw.h:36:5: note: previous declaration of 'nv50_wndw_new_' with type 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type,  const char *, int,  const u32 *, enum nv50_disp_interlock_type,  u32,  u32,  struct nv50_wndw **)'
+
+It can be barely visible, but the declaration says about the parameters
+in the middle:
+  enum nv50_disp_interlock_type,
+  u32 interlock_data,
+  u32 heads,
+
+While the definition states differently:
+  u32 heads,
+  enum nv50_disp_interlock_type interlock_type,
+  u32 interlock_data,
+
+Unify/fix the declaration to match the definition.
+
+Fixes: 53e0a3e70de6 ("drm/nouveau/kms/nv50-: simplify tracking of channel interlocks")
+Cc: Martin Liska <mliska@suse.cz>
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Karol Herbst <kherbst@redhat.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: David Airlie <airlied@gmail.com>
+Cc: Daniel Vetter <daniel@ffwll.ch>
+Cc: dri-devel@lists.freedesktop.org
+Cc: nouveau@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221031114229.10289-1-jirislaby@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/dispnv50/wndw.h | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+index 8bed195ae098a..77bf124319fbd 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+@@ -38,8 +38,9 @@ struct nv50_wndw {
+ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+                  enum drm_plane_type, const char *name, int index,
+-                 const u32 *format, enum nv50_disp_interlock_type,
+-                 u32 interlock_data, u32 heads, struct nv50_wndw **);
++                 const u32 *format, u32 heads,
++                 enum nv50_disp_interlock_type, u32 interlock_data,
++                 struct nv50_wndw **);
+ void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+                        struct nv50_wndw_atom *);
+ void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+-- 
+2.39.2
+
diff --git a/queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch b/queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch
new file mode 100644 (file)
index 0000000..8e7f2bb
--- /dev/null
@@ -0,0 +1,100 @@
+From 08dfe9bad732d3e0ae2dc55714376d34aa89b0bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Jun 2022 20:46:06 +1000
+Subject: drm/nouveau/kms/nv50-: remove unused functions
+
+From: Ben Skeggs <bskeggs@redhat.com>
+
+[ Upstream commit 89ed996b888faaf11c69bb4cbc19f21475c9050e ]
+
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Stable-dep-of: 3638a820c5c3 ("drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/dispnv50/disp.c | 16 ----------------
+ drivers/gpu/drm/nouveau/dispnv50/wndw.c | 12 ------------
+ drivers/gpu/drm/nouveau/dispnv50/wndw.h |  2 --
+ 3 files changed, 30 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+index c2d34c91e840c..804ea035fa46b 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+@@ -2555,14 +2555,6 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend)
+ {
+       struct nouveau_drm *drm = nouveau_drm(dev);
+       struct drm_encoder *encoder;
+-      struct drm_plane *plane;
+-
+-      drm_for_each_plane(plane, dev) {
+-              struct nv50_wndw *wndw = nv50_wndw(plane);
+-              if (plane->funcs != &nv50_wndw)
+-                      continue;
+-              nv50_wndw_fini(wndw);
+-      }
+       list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+               if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST)
+@@ -2578,7 +2570,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+ {
+       struct nv50_core *core = nv50_disp(dev)->core;
+       struct drm_encoder *encoder;
+-      struct drm_plane *plane;
+       if (resume || runtime)
+               core->func->init(core);
+@@ -2591,13 +2582,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+               }
+       }
+-      drm_for_each_plane(plane, dev) {
+-              struct nv50_wndw *wndw = nv50_wndw(plane);
+-              if (plane->funcs != &nv50_wndw)
+-                      continue;
+-              nv50_wndw_init(wndw);
+-      }
+-
+       return 0;
+ }
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+index f07916ffe42cb..831125b4453df 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+@@ -690,18 +690,6 @@ nv50_wndw_notify(struct nvif_notify *notify)
+       return NVIF_NOTIFY_KEEP;
+ }
+-void
+-nv50_wndw_fini(struct nv50_wndw *wndw)
+-{
+-      nvif_notify_put(&wndw->notify);
+-}
+-
+-void
+-nv50_wndw_init(struct nv50_wndw *wndw)
+-{
+-      nvif_notify_get(&wndw->notify);
+-}
+-
+ static const u64 nv50_cursor_format_modifiers[] = {
+       DRM_FORMAT_MOD_LINEAR,
+       DRM_FORMAT_MOD_INVALID,
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+index 3278e28800343..8bed195ae098a 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+@@ -40,8 +40,6 @@ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+                  enum drm_plane_type, const char *name, int index,
+                  const u32 *format, enum nv50_disp_interlock_type,
+                  u32 interlock_data, u32 heads, struct nv50_wndw **);
+-void nv50_wndw_init(struct nv50_wndw *);
+-void nv50_wndw_fini(struct nv50_wndw *);
+ void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+                        struct nv50_wndw_atom *);
+ void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+-- 
+2.39.2
+
diff --git a/queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch b/queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch
new file mode 100644 (file)
index 0000000..33a8c61
--- /dev/null
@@ -0,0 +1,55 @@
+From 6e92a7ec121c7e23afadefc601975ace938df6db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 00:12:16 +0200
+Subject: efi/earlycon: Replace open coded strnchrnul()
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit b7a1cd243839cc1459fbc83a7a62e3b57f29f497 ]
+
+strnchrnul() can be called in the early stages. Replace
+open coded variant in the EFI early console driver.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Stable-dep-of: 0e68b5517d37 ("arm64: efi: Make efi_rt_lock a raw_spinlock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/earlycon.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
+index a52236e11e5f7..fc233b6f27cb2 100644
+--- a/drivers/firmware/efi/earlycon.c
++++ b/drivers/firmware/efi/earlycon.c
+@@ -10,6 +10,7 @@
+ #include <linux/kernel.h>
+ #include <linux/serial_core.h>
+ #include <linux/screen_info.h>
++#include <linux/string.h>
+ #include <asm/early_ioremap.h>
+@@ -143,16 +144,10 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num)
+       len = si->lfb_linelength;
+       while (num) {
+-              unsigned int linemax;
+-              unsigned int h, count = 0;
++              unsigned int linemax = (si->lfb_width - efi_x) / font->width;
++              unsigned int h, count;
+-              for (s = str; *s && *s != '\n'; s++) {
+-                      if (count == num)
+-                              break;
+-                      count++;
+-              }
+-
+-              linemax = (si->lfb_width - efi_x) / font->width;
++              count = strnchrnul(str, num, '\n') - str;
+               if (count > linemax)
+                       count = linemax;
+-- 
+2.39.2
+
diff --git a/queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch b/queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch
new file mode 100644 (file)
index 0000000..ac09fa3
--- /dev/null
@@ -0,0 +1,59 @@
+From f3fd8dc76223f6f1f523e9da85d149c534d08103 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Jan 2023 12:22:21 +0100
+Subject: ext4: Fix possible corruption when moving a directory
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 0813299c586b175d7edb25f56412c54b812d0379 ]
+
+When we are renaming a directory to a different directory, we need to
+update '..' entry in the moved directory. However nothing prevents moved
+directory from being modified and even converted from the inline format
+to the normal format. When such race happens the rename code gets
+confused and we crash. Fix the problem by locking the moved directory.
+
+CC: stable@vger.kernel.org
+Fixes: 32f7f22c0b52 ("ext4: let ext4_rename handle inline dir")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230126112221.11866-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 6f335d58183ee..17590bb769147 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -3923,9 +3923,16 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+                       if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
+                               goto end_rename;
+               }
++              /*
++               * We need to protect against old.inode directory getting
++               * converted from inline directory format into a normal one.
++               */
++              inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
+               retval = ext4_rename_dir_prepare(handle, &old);
+-              if (retval)
++              if (retval) {
++                      inode_unlock(old.inode);
+                       goto end_rename;
++              }
+       }
+       /*
+        * If we're renaming a file within an inline_data dir and adding or
+@@ -4050,6 +4057,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+       } else {
+               ext4_journal_stop(handle);
+       }
++      if (old.dir_bh)
++              inode_unlock(old.inode);
+ release_bh:
+       brelse(old.dir_bh);
+       brelse(old.bh);
+-- 
+2.39.2
+
diff --git a/queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch b/queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch
new file mode 100644 (file)
index 0000000..f59bda9
--- /dev/null
@@ -0,0 +1,113 @@
+From 1339c021f60dc9b4a3e13f93010a5ec6da10eddb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 15:30:24 +0000
+Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e ]
+
+ila_xlat_nl_cmd_get_mapping() generates an empty skb,
+triggerring a recent sanity check [1].
+
+Instead, return an error code, so that user space
+can get it.
+
+[1]
+skb_assert_len
+WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline]
+WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+Modules linked in:
+CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
+pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : skb_assert_len include/linux/skbuff.h:2527 [inline]
+pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+lr : skb_assert_len include/linux/skbuff.h:2527 [inline]
+lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+sp : ffff80001e0d6c40
+x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0
+x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00
+x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10
+x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0
+x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000
+x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001
+x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600
+x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001
+x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744
+x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e
+Call trace:
+skb_assert_len include/linux/skbuff.h:2527 [inline]
+__dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+dev_queue_xmit include/linux/netdevice.h:3033 [inline]
+__netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline]
+__netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325
+netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338
+__netlink_sendskb net/netlink/af_netlink.c:1283 [inline]
+netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292
+netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380
+nlmsg_unicast include/net/netlink.h:1099 [inline]
+genlmsg_unicast include/net/genetlink.h:433 [inline]
+genlmsg_reply include/net/genetlink.h:443 [inline]
+ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493
+genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline]
+genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline]
+genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065
+netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574
+genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076
+netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
+netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365
+netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg net/socket.c:734 [inline]
+____sys_sendmsg+0x558/0x844 net/socket.c:2479
+___sys_sendmsg net/socket.c:2533 [inline]
+__sys_sendmsg+0x26c/0x33c net/socket.c:2562
+__do_sys_sendmsg net/socket.c:2571 [inline]
+__se_sys_sendmsg net/socket.c:2569 [inline]
+__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569
+__invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
+invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52
+el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
+do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193
+el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637
+el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655
+el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591
+irq event stamp: 136484
+hardirqs last enabled at (136483): [<ffff800008350244>] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345
+hardirqs last disabled at (136484): [<ffff800012358d60>] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405
+softirqs last enabled at (136418): [<ffff800008020ea8>] softirq_handle_end kernel/softirq.c:414 [inline]
+softirqs last enabled at (136418): [<ffff800008020ea8>] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600
+softirqs last disabled at (136371): [<ffff80000802b4a4>] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80
+---[ end trace 0000000000000000 ]---
+skb len=0 headroom=0 headlen=0 tailroom=192
+mac=(0,0) net=(0,-1) trans=-1
+shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
+csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0)
+hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0
+dev name=nlmon0 feat=0x0000000000005861
+
+Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ila/ila_xlat.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
+index a1ac0e3d8c60c..163668531a57f 100644
+--- a/net/ipv6/ila/ila_xlat.c
++++ b/net/ipv6/ila/ila_xlat.c
+@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+       rcu_read_lock();
++      ret = -ESRCH;
+       ila = ila_lookup_by_params(&xp, ilan);
+       if (ila) {
+               ret = ila_dump_info(ila,
+-- 
+2.39.2
+
diff --git a/queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch b/queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch
new file mode 100644 (file)
index 0000000..9e4bc70
--- /dev/null
@@ -0,0 +1,101 @@
+From 6f729f8ef0d0d13b35f56334c130830099a64bbe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Mar 2021 10:09:16 +0800
+Subject: iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry()
+
+From: Lu Baolu <baolu.lu@linux.intel.com>
+
+[ Upstream commit 803766cbf85fb8edbf896729bbefc2d38dcf1e0a ]
+
+The pasid_lock is used to synchronize different threads from modifying a
+same pasid directory entry at the same time. It causes below lockdep splat.
+
+[   83.296538] ========================================================
+[   83.296538] WARNING: possible irq lock inversion dependency detected
+[   83.296539] 5.12.0-rc3+ #25 Tainted: G        W
+[   83.296539] --------------------------------------------------------
+[   83.296540] bash/780 just changed the state of lock:
+[   83.296540] ffffffff82b29c98 (device_domain_lock){..-.}-{2:2}, at:
+           iommu_flush_dev_iotlb.part.0+0x32/0x110
+[   83.296547] but this lock took another, SOFTIRQ-unsafe lock in the past:
+[   83.296547]  (pasid_lock){+.+.}-{2:2}
+[   83.296548]
+
+           and interrupts could create inverse lock ordering between them.
+
+[   83.296549] other info that might help us debug this:
+[   83.296549] Chain exists of:
+                 device_domain_lock --> &iommu->lock --> pasid_lock
+[   83.296551]  Possible interrupt unsafe locking scenario:
+
+[   83.296551]        CPU0                    CPU1
+[   83.296552]        ----                    ----
+[   83.296552]   lock(pasid_lock);
+[   83.296553]                                local_irq_disable();
+[   83.296553]                                lock(device_domain_lock);
+[   83.296554]                                lock(&iommu->lock);
+[   83.296554]   <Interrupt>
+[   83.296554]     lock(device_domain_lock);
+[   83.296555]
+                *** DEADLOCK ***
+
+Fix it by replacing the pasid_lock with an atomic exchange operation.
+
+Reported-and-tested-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Link: https://lore.kernel.org/r/20210320020916.640115-1-baolu.lu@linux.intel.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Stable-dep-of: 194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/pasid.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
+index 86fd49ae7f612..f821153390e53 100644
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -24,7 +24,6 @@
+ /*
+  * Intel IOMMU system wide PASID name space:
+  */
+-static DEFINE_SPINLOCK(pasid_lock);
+ u32 intel_pasid_max_id = PASID_MAX;
+ int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
+@@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+       dir_index = pasid >> PASID_PDE_SHIFT;
+       index = pasid & PASID_PTE_MASK;
+-      spin_lock(&pasid_lock);
++retry:
+       entries = get_pasid_table_from_pde(&dir[dir_index]);
+       if (!entries) {
+               entries = alloc_pgtable_page(info->iommu->node);
+-              if (!entries) {
+-                      spin_unlock(&pasid_lock);
++              if (!entries)
+                       return NULL;
+-              }
+-              WRITE_ONCE(dir[dir_index].val,
+-                         (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
++              /*
++               * The pasid directory table entry won't be freed after
++               * allocation. No worry about the race with free and
++               * clear. However, this entry might be populated by others
++               * while we are preparing it. Use theirs with a retry.
++               */
++              if (cmpxchg64(&dir[dir_index].val, 0ULL,
++                            (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
++                      free_pgtable_page(entries);
++                      goto retry;
++              }
+       }
+-      spin_unlock(&pasid_lock);
+       return &entries[index];
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch b/queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
new file mode 100644 (file)
index 0000000..8f482b6
--- /dev/null
@@ -0,0 +1,82 @@
+From c5ea5be059771ebbdbacad4dbe4a9958b7cd028b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Feb 2023 21:08:15 +0800
+Subject: iommu/vt-d: Fix PASID directory pointer coherency
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+[ Upstream commit 194b3348bdbb7db65375c72f3f774aee4cc6614e ]
+
+On platforms that do not support IOMMU Extended capability bit 0
+Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
+any translation structures. IOMMU access goes only directly to
+memory. Intel IOMMU code was missing a flush for the PASID table
+directory that resulted in the unrecoverable fault as shown below.
+
+This patch adds clflush calls whenever allocating and updating
+a PASID table directory to ensure cache coherency.
+
+On the reverse direction, there's no need to clflush the PASID directory
+pointer when we deactivate a context entry in that IOMMU hardware will
+not see the old PASID directory pointer after we clear the context entry.
+PASID directory entries are also never freed once allocated.
+
+ DMAR: DRHD: handling fault status reg 3
+ DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
+       [fault reason 0x51] SM: Present bit in Directory Entry is clear
+ DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
+ DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
+ DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
+ DMAR: pasid dir entry: 0x0000000101b4e001
+ DMAR: pasid table entry[0]: 0x0000000000000109
+ DMAR: pasid table entry[1]: 0x0000000000000001
+ DMAR: pasid table entry[2]: 0x0000000000000000
+ DMAR: pasid table entry[3]: 0x0000000000000000
+ DMAR: pasid table entry[4]: 0x0000000000000000
+ DMAR: pasid table entry[5]: 0x0000000000000000
+ DMAR: pasid table entry[6]: 0x0000000000000000
+ DMAR: pasid table entry[7]: 0x0000000000000000
+ DMAR: PTE not present at level 4
+
+Cc: <stable@vger.kernel.org>
+Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Sukumar Ghorai <sukumar.ghorai@intel.com>
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/pasid.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
+index f821153390e53..80d6412e2c546 100644
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -186,6 +186,9 @@ int intel_pasid_alloc_table(struct device *dev)
+ attach_out:
+       device_attach_pasid_table(info, pasid_table);
++      if (!ecap_coherent(info->iommu->ecap))
++              clflush_cache_range(pasid_table->table, size);
++
+       return 0;
+ }
+@@ -276,6 +279,10 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+                       free_pgtable_page(entries);
+                       goto retry;
+               }
++              if (!ecap_coherent(info->iommu->ecap)) {
++                      clflush_cache_range(entries, VTD_PAGE_SIZE);
++                      clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
++              }
+       }
+       return &entries[index];
+-- 
+2.39.2
+
diff --git a/queue-5.10/irq-fix-typos-in-comments.patch b/queue-5.10/irq-fix-typos-in-comments.patch
new file mode 100644 (file)
index 0000000..e85661b
--- /dev/null
@@ -0,0 +1,465 @@
+From aeb29bf469c12a96239daf3b5055dbb7cc916184 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Mar 2021 04:21:30 +0100
+Subject: irq: Fix typos in comments
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit a359f757965aafd0f58570de95dc6bc06cf12a9c ]
+
+Fix ~36 single-word typos in the IRQ, irqchip and irqdomain code comments.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Stable-dep-of: 6e6f75c9c98d ("irqdomain: Look for existing mapping only once")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-aspeed-vic.c       |  4 ++--
+ drivers/irqchip/irq-bcm7120-l2.c       |  2 +-
+ drivers/irqchip/irq-csky-apb-intc.c    |  2 +-
+ drivers/irqchip/irq-gic-v2m.c          |  2 +-
+ drivers/irqchip/irq-gic-v3-its.c       | 10 +++++-----
+ drivers/irqchip/irq-gic-v3.c           |  2 +-
+ drivers/irqchip/irq-loongson-pch-pic.c |  2 +-
+ drivers/irqchip/irq-meson-gpio.c       |  2 +-
+ drivers/irqchip/irq-mtk-cirq.c         |  2 +-
+ drivers/irqchip/irq-mxs.c              |  4 ++--
+ drivers/irqchip/irq-sun4i.c            |  2 +-
+ drivers/irqchip/irq-ti-sci-inta.c      |  2 +-
+ drivers/irqchip/irq-vic.c              |  4 ++--
+ drivers/irqchip/irq-xilinx-intc.c      |  2 +-
+ include/linux/irq.h                    |  4 ++--
+ include/linux/irqdesc.h                |  2 +-
+ kernel/irq/chip.c                      |  2 +-
+ kernel/irq/dummychip.c                 |  2 +-
+ kernel/irq/irqdesc.c                   |  2 +-
+ kernel/irq/irqdomain.c                 |  8 ++++----
+ kernel/irq/manage.c                    |  6 +++---
+ kernel/irq/msi.c                       |  2 +-
+ kernel/irq/timings.c                   |  2 +-
+ 23 files changed, 36 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c
+index 6567ed782f82c..58717cd44f99f 100644
+--- a/drivers/irqchip/irq-aspeed-vic.c
++++ b/drivers/irqchip/irq-aspeed-vic.c
+@@ -71,7 +71,7 @@ static void vic_init_hw(struct aspeed_vic *vic)
+       writel(0, vic->base + AVIC_INT_SELECT);
+       writel(0, vic->base + AVIC_INT_SELECT + 4);
+-      /* Some interrupts have a programable high/low level trigger
++      /* Some interrupts have a programmable high/low level trigger
+        * (4 GPIO direct inputs), for now we assume this was configured
+        * by firmware. We read which ones are edge now.
+        */
+@@ -203,7 +203,7 @@ static int __init avic_of_init(struct device_node *node,
+       }
+       vic->base = regs;
+-      /* Initialize soures, all masked */
++      /* Initialize sources, all masked */
+       vic_init_hw(vic);
+       /* Ready to receive interrupts */
+diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
+index 7d776c905b7d2..1c2c5bd5a9fc1 100644
+--- a/drivers/irqchip/irq-bcm7120-l2.c
++++ b/drivers/irqchip/irq-bcm7120-l2.c
+@@ -310,7 +310,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
+               if (data->can_wake) {
+                       /* This IRQ chip can wake the system, set all
+-                       * relevant child interupts in wake_enabled mask
++                       * relevant child interrupts in wake_enabled mask
+                        */
+                       gc->wake_enabled = 0xffffffff;
+                       gc->wake_enabled &= ~gc->unused;
+diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c
+index 5a2ec43b7ddd4..ab91afa867557 100644
+--- a/drivers/irqchip/irq-csky-apb-intc.c
++++ b/drivers/irqchip/irq-csky-apb-intc.c
+@@ -176,7 +176,7 @@ gx_intc_init(struct device_node *node, struct device_node *parent)
+       writel(0x0, reg_base + GX_INTC_NEN63_32);
+       /*
+-       * Initial mask reg with all unmasked, because we only use enalbe reg
++       * Initial mask reg with all unmasked, because we only use enable reg
+        */
+       writel(0x0, reg_base + GX_INTC_NMASK31_00);
+       writel(0x0, reg_base + GX_INTC_NMASK63_32);
+diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
+index fbec07d634ad2..4116b48e60aff 100644
+--- a/drivers/irqchip/irq-gic-v2m.c
++++ b/drivers/irqchip/irq-gic-v2m.c
+@@ -371,7 +371,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
+        * the MSI data is the absolute value within the range from
+        * spi_start to (spi_start + num_spis).
+        *
+-       * Broadom NS2 GICv2m implementation has an erratum where the MSI data
++       * Broadcom NS2 GICv2m implementation has an erratum where the MSI data
+        * is 'spi_number - 32'
+        *
+        * Reading that register fails on the Graviton implementation
+diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
+index d8cb5bcd6b10e..5ec091c64d47f 100644
+--- a/drivers/irqchip/irq-gic-v3-its.c
++++ b/drivers/irqchip/irq-gic-v3-its.c
+@@ -1492,7 +1492,7 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
+        *
+        * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI
+        * value or to 1023, depending on the enable bit. But that
+-       * would be issueing a mapping for an /existing/ DevID+EventID
++       * would be issuing a mapping for an /existing/ DevID+EventID
+        * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI
+        * to the /same/ vPE, using this opportunity to adjust the
+        * doorbell. Mouahahahaha. We loves it, Precious.
+@@ -3122,7 +3122,7 @@ static void its_cpu_init_lpis(void)
+               /*
+                * It's possible for CPU to receive VLPIs before it is
+-               * sheduled as a vPE, especially for the first CPU, and the
++               * scheduled as a vPE, especially for the first CPU, and the
+                * VLPI with INTID larger than 2^(IDbits+1) will be considered
+                * as out of range and dropped by GIC.
+                * So we initialize IDbits to known value to avoid VLPI drop.
+@@ -3613,7 +3613,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+       /*
+        * If all interrupts have been freed, start mopping the
+-       * floor. This is conditionned on the device not being shared.
++       * floor. This is conditioned on the device not being shared.
+        */
+       if (!its_dev->shared &&
+           bitmap_empty(its_dev->event_map.lpi_map,
+@@ -4187,7 +4187,7 @@ static int its_sgi_set_affinity(struct irq_data *d,
+ {
+       /*
+        * There is no notion of affinity for virtual SGIs, at least
+-       * not on the host (since they can only be targetting a vPE).
++       * not on the host (since they can only be targeting a vPE).
+        * Tell the kernel we've done whatever it asked for.
+        */
+       irq_data_update_effective_affinity(d, mask_val);
+@@ -4232,7 +4232,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d,
+       /*
+        * Locking galore! We can race against two different events:
+        *
+-       * - Concurent vPE affinity change: we must make sure it cannot
++       * - Concurrent vPE affinity change: we must make sure it cannot
+        *   happen, or we'll talk to the wrong redistributor. This is
+        *   identical to what happens with vLPIs.
+        *
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index 4c8f18f0cecf8..2805969e4f15a 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -1456,7 +1456,7 @@ static int gic_irq_domain_translate(struct irq_domain *d,
+               /*
+                * Make it clear that broken DTs are... broken.
+-               * Partitionned PPIs are an unfortunate exception.
++               * Partitioned PPIs are an unfortunate exception.
+                */
+               WARN_ON(*type == IRQ_TYPE_NONE &&
+                       fwspec->param[0] != GIC_IRQ_TYPE_PARTITION);
+diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
+index 90e1ad6e36120..a4eb8a2181c7f 100644
+--- a/drivers/irqchip/irq-loongson-pch-pic.c
++++ b/drivers/irqchip/irq-loongson-pch-pic.c
+@@ -180,7 +180,7 @@ static void pch_pic_reset(struct pch_pic *priv)
+       int i;
+       for (i = 0; i < PIC_COUNT; i++) {
+-              /* Write vectore ID */
++              /* Write vectored ID */
+               writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i));
+               /* Hardcode route to HT0 Lo */
+               writeb(1, priv->base + PCH_INT_ROUTE(i));
+diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
+index bc7aebcc96e9c..e50676ce2ec84 100644
+--- a/drivers/irqchip/irq-meson-gpio.c
++++ b/drivers/irqchip/irq-meson-gpio.c
+@@ -227,7 +227,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl,
+       /*
+        * Get the hwirq number assigned to this channel through
+-       * a pointer the channel_irq table. The added benifit of this
++       * a pointer the channel_irq table. The added benefit of this
+        * method is that we can also retrieve the channel index with
+        * it, using the table base.
+        */
+diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c
+index 69ba8ce3c1785..9bca0918078e8 100644
+--- a/drivers/irqchip/irq-mtk-cirq.c
++++ b/drivers/irqchip/irq-mtk-cirq.c
+@@ -217,7 +217,7 @@ static void mtk_cirq_resume(void)
+ {
+       u32 value;
+-      /* flush recored interrupts, will send signals to parent controller */
++      /* flush recorded interrupts, will send signals to parent controller */
+       value = readl_relaxed(cirq_data->base + CIRQ_CONTROL);
+       writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL);
+diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
+index a671938fd97f6..d1f5740cd5755 100644
+--- a/drivers/irqchip/irq-mxs.c
++++ b/drivers/irqchip/irq-mxs.c
+@@ -58,7 +58,7 @@ struct icoll_priv {
+ static struct icoll_priv icoll_priv;
+ static struct irq_domain *icoll_domain;
+-/* calculate bit offset depending on number of intterupt per register */
++/* calculate bit offset depending on number of interrupt per register */
+ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit)
+ {
+       /*
+@@ -68,7 +68,7 @@ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit)
+       return bit << ((d->hwirq & 3) << 3);
+ }
+-/* calculate mem offset depending on number of intterupt per register */
++/* calculate mem offset depending on number of interrupt per register */
+ static void __iomem *icoll_intr_reg(struct irq_data *d)
+ {
+       /* offset = hwirq / intr_per_reg * 0x10 */
+diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
+index fb78d6623556c..9ea94456b178c 100644
+--- a/drivers/irqchip/irq-sun4i.c
++++ b/drivers/irqchip/irq-sun4i.c
+@@ -189,7 +189,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs)
+        * 3) spurious irq
+        * So if we immediately get a reading of 0, check the irq-pending reg
+        * to differentiate between 2 and 3. We only do this once to avoid
+-       * the extra check in the common case of 1 hapening after having
++       * the extra check in the common case of 1 happening after having
+        * read the vector-reg once.
+        */
+       hwirq = readl(irq_ic_data->irq_base + SUN4I_IRQ_VECTOR_REG) >> 2;
+diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
+index 532d0ae172d9f..ca1f593f4d13a 100644
+--- a/drivers/irqchip/irq-ti-sci-inta.c
++++ b/drivers/irqchip/irq-ti-sci-inta.c
+@@ -78,7 +78,7 @@ struct ti_sci_inta_vint_desc {
+  * struct ti_sci_inta_irq_domain - Structure representing a TISCI based
+  *                               Interrupt Aggregator IRQ domain.
+  * @sci:              Pointer to TISCI handle
+- * @vint:             TISCI resource pointer representing IA inerrupts.
++ * @vint:             TISCI resource pointer representing IA interrupts.
+  * @global_event:     TISCI resource pointer representing global events.
+  * @vint_list:                List of the vints active in the system
+  * @vint_mutex:               Mutex to protect vint_list
+diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
+index e460363742272..62f3d29f90420 100644
+--- a/drivers/irqchip/irq-vic.c
++++ b/drivers/irqchip/irq-vic.c
+@@ -163,7 +163,7 @@ static struct syscore_ops vic_syscore_ops = {
+ };
+ /**
+- * vic_pm_init - initicall to register VIC pm
++ * vic_pm_init - initcall to register VIC pm
+  *
+  * This is called via late_initcall() to register
+  * the resources for the VICs due to the early
+@@ -397,7 +397,7 @@ static void __init vic_clear_interrupts(void __iomem *base)
+ /*
+  * The PL190 cell from ARM has been modified by ST to handle 64 interrupts.
+  * The original cell has 32 interrupts, while the modified one has 64,
+- * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case
++ * replicating two blocks 0x00..0x1f in 0x20..0x3f. In that case
+  * the probe function is called twice, with base set to offset 000
+  *  and 020 within the page. We call this "second block".
+  */
+diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
+index 1d3d273309bd3..8cd1bfc730572 100644
+--- a/drivers/irqchip/irq-xilinx-intc.c
++++ b/drivers/irqchip/irq-xilinx-intc.c
+@@ -210,7 +210,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
+       /*
+        * Disable all external interrupts until they are
+-       * explicity requested.
++       * explicitly requested.
+        */
+       xintc_write(irqc, IER, 0);
+diff --git a/include/linux/irq.h b/include/linux/irq.h
+index 607bee9271bd7..b89a8ac83d1bc 100644
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -116,7 +116,7 @@ enum {
+  * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity
+  * IRQ_SET_MASK_OK_DONE       - Same as IRQ_SET_MASK_OK for core. Special code to
+  *                      support stacked irqchips, which indicates skipping
+- *                      all descendent irqchips.
++ *                      all descendant irqchips.
+  */
+ enum {
+       IRQ_SET_MASK_OK = 0,
+@@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d)
+ /*
+  * Must only be called of irqchip.irq_set_affinity() or low level
+- * hieararchy domain allocation functions.
++ * hierarchy domain allocation functions.
+  */
+ static inline void irqd_set_single_target(struct irq_data *d)
+ {
+diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
+index 5745491303e03..fdb22e0f9a91e 100644
+--- a/include/linux/irqdesc.h
++++ b/include/linux/irqdesc.h
+@@ -32,7 +32,7 @@ struct pt_regs;
+  * @last_unhandled:   aging timer for unhandled count
+  * @irqs_unhandled:   stats field for spurious unhandled interrupts
+  * @threads_handled:  stats field for deferred spurious detection of threaded handlers
+- * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers
++ * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
+  * @lock:             locking for SMP
+  * @affinity_hint:    hint to user space for preferred irq affinity
+  * @affinity_notify:  context for notification of affinity changes
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index 621d8dd157bc1..e7d284261d450 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -811,7 +811,7 @@ void handle_edge_irq(struct irq_desc *desc)
+               /*
+                * When another irq arrived while we were handling
+                * one, we could have masked the irq.
+-               * Renable it, if it was not disabled in meantime.
++               * Reenable it, if it was not disabled in meantime.
+                */
+               if (unlikely(desc->istate & IRQS_PENDING)) {
+                       if (!irqd_irq_disabled(&desc->irq_data) &&
+diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
+index 0b0cdf206dc44..7fe6cffe7d0df 100644
+--- a/kernel/irq/dummychip.c
++++ b/kernel/irq/dummychip.c
+@@ -13,7 +13,7 @@
+ /*
+  * What should we do if we get a hw irq event on an illegal vector?
+- * Each architecture has to answer this themself.
++ * Each architecture has to answer this themselves.
+  */
+ static void ack_bad(struct irq_data *data)
+ {
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 9b0914a063f90..6c009a033c73f 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str)
+       cpulist_parse(str, irq_default_affinity);
+       /*
+        * Set at least the boot cpu. We don't want to end up with
+-       * bugreports caused by random comandline masks
++       * bugreports caused by random commandline masks
+        */
+       cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+       return 1;
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 1720998933f8d..fe07888a7d96a 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
+  * @name:     Optional user provided domain name
+  * @pa:               Optional user-provided physical address
+  *
+- * Allocate a struct irqchip_fwid, and return a poiner to the embedded
++ * Allocate a struct irqchip_fwid, and return a pointer to the embedded
+  * fwnode_handle (or NULL on failure).
+  *
+  * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
+@@ -657,7 +657,7 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+       pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+-      /* Look for default domain if nececssary */
++      /* Look for default domain if necessary */
+       if (domain == NULL)
+               domain = irq_default_domain;
+       if (domain == NULL) {
+@@ -893,7 +893,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
+ {
+       struct irq_data *data;
+-      /* Look for default domain if nececssary */
++      /* Look for default domain if necessary */
+       if (domain == NULL)
+               domain = irq_default_domain;
+       if (domain == NULL)
+@@ -1423,7 +1423,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
+  * The whole process to setup an IRQ has been split into two steps.
+  * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+  * descriptor and required hardware resources. The second step,
+- * irq_domain_activate_irq(), is to program hardwares with preallocated
++ * irq_domain_activate_irq(), is to program the hardware with preallocated
+  * resources. In this way, it's easier to rollback when failing to
+  * allocate resources.
+  */
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 437b073dc487e..0159925054faa 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -341,7 +341,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
+        * If the interrupt is not yet activated, just store the affinity
+        * mask and do not call the chip driver at all. On activation the
+        * driver has to make sure anyway that the interrupt is in a
+-       * useable state so startup works.
++       * usable state so startup works.
+        */
+       if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
+           irqd_is_activated(data) || !irqd_affinity_on_activate(data))
+@@ -999,7 +999,7 @@ static void irq_finalize_oneshot(struct irq_desc *desc,
+        * to IRQS_INPROGRESS and the irq line is masked forever.
+        *
+        * This also serializes the state of shared oneshot handlers
+-       * versus "desc->threads_onehsot |= action->thread_mask;" in
++       * versus "desc->threads_oneshot |= action->thread_mask;" in
+        * irq_wake_thread(). See the comment there which explains the
+        * serialization.
+        */
+@@ -1877,7 +1877,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
+       /* Last action releases resources */
+       if (!desc->action) {
+               /*
+-               * Reaquire bus lock as irq_release_resources() might
++               * Reacquire bus lock as irq_release_resources() might
+                * require it to deallocate resources over the slow bus.
+                */
+               chip_bus_lock(desc);
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index b47d95b68ac1a..4457f3e966d0e 100644
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -5,7 +5,7 @@
+  *
+  * This file is licensed under GPLv2.
+  *
+- * This file contains common code to support Message Signalled Interrupt for
++ * This file contains common code to support Message Signaled Interrupts for
+  * PCI compatible and non PCI compatible devices.
+  */
+ #include <linux/types.h>
+diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
+index 1f981162648a3..00d45b6bd8f89 100644
+--- a/kernel/irq/timings.c
++++ b/kernel/irq/timings.c
+@@ -490,7 +490,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
+       /*
+        * The interrupt triggered more than one second apart, that
+-       * ends the sequence as predictible for our purpose. In this
++       * ends the sequence as predictable for our purpose. In this
+        * case, assume we have the beginning of a sequence and the
+        * timestamp is the first value. As it is impossible to
+        * predict anything at this point, return.
+-- 
+2.39.2
+
diff --git a/queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch b/queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch
new file mode 100644 (file)
index 0000000..aaebef9
--- /dev/null
@@ -0,0 +1,53 @@
+From bfa01f225d22fffdf704f267bcb38e4f9735562b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Sep 2021 10:52:03 +0800
+Subject: irqdomain: Change the type of 'size' in __irq_domain_add() to be
+ consistent
+
+From: Bixuan Cui <cuibixuan@huawei.com>
+
+[ Upstream commit 20c36ce2164f1774b487d443ece99b754bc6ad43 ]
+
+The 'size' is used in struct_size(domain, revmap, size) and its input
+parameter type is 'size_t'(unsigned int).
+Changing the size to 'unsigned int' to make the type consistent.
+
+Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com
+Stable-dep-of: 8932c32c3053 ("irqdomain: Fix domain registration race")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/irqdomain.h | 2 +-
+ kernel/irq/irqdomain.c    | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
+index ea5a337e0f8b8..9b9743f7538c4 100644
+--- a/include/linux/irqdomain.h
++++ b/include/linux/irqdomain.h
+@@ -256,7 +256,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
+ }
+ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+                                   irq_hw_number_t hwirq_max, int direct_max,
+                                   const struct irq_domain_ops *ops,
+                                   void *host_data);
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 245e317c72908..426242c8903d4 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -130,7 +130,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
+  * Allocates and initializes an irq_domain structure.
+  * Returns pointer to IRQ domain, or NULL on failure.
+  */
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+                                   irq_hw_number_t hwirq_max, int direct_max,
+                                   const struct irq_domain_ops *ops,
+                                   void *host_data)
+-- 
+2.39.2
+
diff --git a/queue-5.10/irqdomain-fix-domain-registration-race.patch b/queue-5.10/irqdomain-fix-domain-registration-race.patch
new file mode 100644 (file)
index 0000000..a2085b7
--- /dev/null
@@ -0,0 +1,134 @@
+From 4ec2537dd6d7bd9da61dca871a019630766b8f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:49 +0100
+Subject: irqdomain: Fix domain registration race
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 8932c32c3053accd50702b36e944ac2016cd103c ]
+
+Hierarchical domains created using irq_domain_create_hierarchy() are
+currently added to the domain list before having been fully initialised.
+
+This specifically means that a racing allocation request might fail to
+allocate irq data for the inner domains of a hierarchy in case the
+parent domain pointer has not yet been set up.
+
+Note that this is not really any issue for irqchip drivers that are
+registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
+but could potentially cause trouble with drivers that are registered
+later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
+gpiochip drivers, etc.).
+
+Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
+Cc: stable@vger.kernel.org      # 3.19
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[ johan: add commit message ]
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 62 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 43 insertions(+), 19 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 426242c8903d4..fd3f7c16c299a 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -117,23 +117,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
+ }
+ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
+-/**
+- * __irq_domain_add() - Allocate a new irq_domain data structure
+- * @fwnode: firmware node for the interrupt controller
+- * @size: Size of linear map; 0 for radix mapping only
+- * @hwirq_max: Maximum number of interrupts supported by controller
+- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+- *              direct mapping
+- * @ops: domain callbacks
+- * @host_data: Controller private data pointer
+- *
+- * Allocates and initializes an irq_domain structure.
+- * Returns pointer to IRQ domain, or NULL on failure.
+- */
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+-                                  irq_hw_number_t hwirq_max, int direct_max,
+-                                  const struct irq_domain_ops *ops,
+-                                  void *host_data)
++static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
++                                            unsigned int size,
++                                            irq_hw_number_t hwirq_max,
++                                            int direct_max,
++                                            const struct irq_domain_ops *ops,
++                                            void *host_data)
+ {
+       struct irqchip_fwid *fwid;
+       struct irq_domain *domain;
+@@ -210,12 +199,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
+       domain->revmap_direct_max_irq = direct_max;
+       irq_domain_check_hierarchy(domain);
++      return domain;
++}
++
++static void __irq_domain_publish(struct irq_domain *domain)
++{
+       mutex_lock(&irq_domain_mutex);
+       debugfs_add_domain_dir(domain);
+       list_add(&domain->link, &irq_domain_list);
+       mutex_unlock(&irq_domain_mutex);
+       pr_debug("Added domain %s\n", domain->name);
++}
++
++/**
++ * __irq_domain_add() - Allocate a new irq_domain data structure
++ * @fwnode: firmware node for the interrupt controller
++ * @size: Size of linear map; 0 for radix mapping only
++ * @hwirq_max: Maximum number of interrupts supported by controller
++ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
++ *              direct mapping
++ * @ops: domain callbacks
++ * @host_data: Controller private data pointer
++ *
++ * Allocates and initializes an irq_domain structure.
++ * Returns pointer to IRQ domain, or NULL on failure.
++ */
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
++                                  irq_hw_number_t hwirq_max, int direct_max,
++                                  const struct irq_domain_ops *ops,
++                                  void *host_data)
++{
++      struct irq_domain *domain;
++
++      domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
++                                   ops, host_data);
++      if (domain)
++              __irq_domain_publish(domain);
++
+       return domain;
+ }
+ EXPORT_SYMBOL_GPL(__irq_domain_add);
+@@ -1110,12 +1131,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+       struct irq_domain *domain;
+       if (size)
+-              domain = irq_domain_create_linear(fwnode, size, ops, host_data);
++              domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
+       else
+-              domain = irq_domain_create_tree(fwnode, ops, host_data);
++              domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
++
+       if (domain) {
+               domain->parent = parent;
+               domain->flags |= flags;
++
++              __irq_domain_publish(domain);
+       }
+       return domain;
+-- 
+2.39.2
+
diff --git a/queue-5.10/irqdomain-fix-mapping-creation-race.patch b/queue-5.10/irqdomain-fix-mapping-creation-race.patch
new file mode 100644 (file)
index 0000000..81b5381
--- /dev/null
@@ -0,0 +1,184 @@
+From c1c69cf22e1bc56cc9387569f82fa6bd417f7172 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:48 +0100
+Subject: irqdomain: Fix mapping-creation race
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 601363cc08da25747feb87c55573dd54de91d66a ]
+
+Parallel probing of devices that share interrupts (e.g. when a driver
+uses asynchronous probing) can currently result in two mappings for the
+same hardware interrupt to be created due to missing serialisation.
+
+Make sure to hold the irq_domain_mutex when creating mappings so that
+looking for an existing mapping before creating a new one is done
+atomically.
+
+Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Link: https://lore.kernel.org/r/YuJXMHoT4ijUxnRb@hovoldconsulting.com
+Cc: stable@vger.kernel.org      # 4.8
+Cc: Dmitry Torokhov <dtor@chromium.org>
+Cc: Jon Hunter <jonathanh@nvidia.com>
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-7-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 64 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index a1e1433a07754..245e317c72908 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex);
+ static struct irq_domain *irq_default_domain;
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++                                      unsigned int nr_irqs, int node, void *arg,
++                                      bool realloc, const struct irq_affinity_desc *affinity);
+ static void irq_domain_check_hierarchy(struct irq_domain *domain);
+ struct irqchip_fwid {
+@@ -637,9 +640,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
+ }
+ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+-static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+-                                                irq_hw_number_t hwirq,
+-                                                const struct irq_affinity_desc *affinity)
++static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
++                                                     irq_hw_number_t hwirq,
++                                                     const struct irq_affinity_desc *affinity)
+ {
+       struct device_node *of_node = irq_domain_get_of_node(domain);
+       int virq;
+@@ -654,7 +657,7 @@ static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+               return 0;
+       }
+-      if (irq_domain_associate(domain, virq, hwirq)) {
++      if (irq_domain_associate_locked(domain, virq, hwirq)) {
+               irq_free_desc(virq);
+               return 0;
+       }
+@@ -690,14 +693,20 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+               return 0;
+       }
++      mutex_lock(&irq_domain_mutex);
++
+       /* Check if mapping already exists */
+       virq = irq_find_mapping(domain, hwirq);
+       if (virq) {
+               pr_debug("existing mapping on virq %d\n", virq);
+-              return virq;
++              goto out;
+       }
+-      return __irq_create_mapping_affinity(domain, hwirq, affinity);
++      virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
++out:
++      mutex_unlock(&irq_domain_mutex);
++
++      return virq;
+ }
+ EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
+@@ -799,6 +808,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+       if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
+               type &= IRQ_TYPE_SENSE_MASK;
++      mutex_lock(&irq_domain_mutex);
++
+       /*
+        * If we've already configured this interrupt,
+        * don't do it again, or hell will break loose.
+@@ -811,7 +822,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+                * interrupt number.
+                */
+               if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+-                      return virq;
++                      goto out;
+               /*
+                * If the trigger type has not been set yet, then set
+@@ -819,35 +830,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+                */
+               if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
+                       irq_data = irq_get_irq_data(virq);
+-                      if (!irq_data)
+-                              return 0;
++                      if (!irq_data) {
++                              virq = 0;
++                              goto out;
++                      }
+                       irqd_set_trigger_type(irq_data, type);
+-                      return virq;
++                      goto out;
+               }
+               pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
+                       hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
+-              return 0;
++              virq = 0;
++              goto out;
+       }
+       if (irq_domain_is_hierarchy(domain)) {
+-              virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
+-              if (virq <= 0)
+-                      return 0;
++              virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
++                                                  fwspec, false, NULL);
++              if (virq <= 0) {
++                      virq = 0;
++                      goto out;
++              }
+       } else {
+               /* Create mapping */
+-              virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
++              virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
+               if (!virq)
+-                      return virq;
++                      goto out;
+       }
+       irq_data = irq_get_irq_data(virq);
+-      if (WARN_ON(!irq_data))
+-              return 0;
++      if (WARN_ON(!irq_data)) {
++              virq = 0;
++              goto out;
++      }
+       /* Store trigger type */
+       irqd_set_trigger_type(irq_data, type);
++out:
++      mutex_unlock(&irq_domain_mutex);
+       return virq;
+ }
+@@ -1856,6 +1877,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+       irq_set_handler_data(virq, handler_data);
+ }
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++                                      unsigned int nr_irqs, int node, void *arg,
++                                      bool realloc, const struct irq_affinity_desc *affinity)
++{
++      return -EINVAL;
++}
++
+ static void irq_domain_check_hierarchy(struct irq_domain *domain)
+ {
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch b/queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch
new file mode 100644 (file)
index 0000000..2d8e47e
--- /dev/null
@@ -0,0 +1,133 @@
+From 85884bb78e6116baa316676ba22c6ffc9cba7a43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:46 +0100
+Subject: irqdomain: Look for existing mapping only once
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 6e6f75c9c98d2d246d90411ff2b6f0cd271f4cba ]
+
+Avoid looking for an existing mapping twice when creating a new mapping
+using irq_create_fwspec_mapping() by factoring out the actual allocation
+which is shared with irq_create_mapping_affinity().
+
+The new helper function will also be used to fix a shared-interrupt
+mapping race, hence the Fixes tag.
+
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Cc: stable@vger.kernel.org      # 4.8
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-5-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 60 +++++++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 27 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index fe07888a7d96a..d18c25a41673f 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -637,6 +637,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
+ }
+ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
++static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
++                                                irq_hw_number_t hwirq,
++                                                const struct irq_affinity_desc *affinity)
++{
++      struct device_node *of_node = irq_domain_get_of_node(domain);
++      int virq;
++
++      pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
++
++      /* Allocate a virtual interrupt number */
++      virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
++                                    affinity);
++      if (virq <= 0) {
++              pr_debug("-> virq allocation failed\n");
++              return 0;
++      }
++
++      if (irq_domain_associate(domain, virq, hwirq)) {
++              irq_free_desc(virq);
++              return 0;
++      }
++
++      pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
++              hwirq, of_node_full_name(of_node), virq);
++
++      return virq;
++}
++
+ /**
+  * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
+  * @domain: domain owning this hardware interrupt or NULL for default domain
+@@ -649,14 +677,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+  * on the number returned from that call.
+  */
+ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+-                                     irq_hw_number_t hwirq,
+-                                     const struct irq_affinity_desc *affinity)
++                                       irq_hw_number_t hwirq,
++                                       const struct irq_affinity_desc *affinity)
+ {
+-      struct device_node *of_node;
+       int virq;
+-      pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+-
+       /* Look for default domain if necessary */
+       if (domain == NULL)
+               domain = irq_default_domain;
+@@ -664,34 +689,15 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+               WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
+               return 0;
+       }
+-      pr_debug("-> using domain @%p\n", domain);
+-
+-      of_node = irq_domain_get_of_node(domain);
+       /* Check if mapping already exists */
+       virq = irq_find_mapping(domain, hwirq);
+       if (virq) {
+-              pr_debug("-> existing mapping on virq %d\n", virq);
++              pr_debug("existing mapping on virq %d\n", virq);
+               return virq;
+       }
+-      /* Allocate a virtual interrupt number */
+-      virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
+-                                    affinity);
+-      if (virq <= 0) {
+-              pr_debug("-> virq allocation failed\n");
+-              return 0;
+-      }
+-
+-      if (irq_domain_associate(domain, virq, hwirq)) {
+-              irq_free_desc(virq);
+-              return 0;
+-      }
+-
+-      pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
+-              hwirq, of_node_full_name(of_node), virq);
+-
+-      return virq;
++      return __irq_create_mapping_affinity(domain, hwirq, affinity);
+ }
+ EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
+@@ -831,7 +837,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+                       return 0;
+       } else {
+               /* Create mapping */
+-              virq = irq_create_mapping(domain, hwirq);
++              virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
+               if (!virq)
+                       return virq;
+       }
+-- 
+2.39.2
+
diff --git a/queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch b/queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch
new file mode 100644 (file)
index 0000000..4045dd7
--- /dev/null
@@ -0,0 +1,155 @@
+From af649d9740bab8166d36ad7c818c2e53c22945f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:47 +0100
+Subject: irqdomain: Refactor __irq_domain_alloc_irqs()
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit d55f7f4c58c07beb5050a834bf57ae2ede599c7e ]
+
+Refactor __irq_domain_alloc_irqs() so that it can be called internally
+while holding the irq_domain_mutex.
+
+This will be used to fix a shared-interrupt mapping race, hence the
+Fixes tag.
+
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Cc: stable@vger.kernel.org      # 4.8
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++-------------------
+ 1 file changed, 48 insertions(+), 40 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index d18c25a41673f..a1e1433a07754 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -1411,40 +1411,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
+       return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
+ }
+-/**
+- * __irq_domain_alloc_irqs - Allocate IRQs from domain
+- * @domain:   domain to allocate from
+- * @irq_base: allocate specified IRQ number if irq_base >= 0
+- * @nr_irqs:  number of IRQs to allocate
+- * @node:     NUMA node id for memory allocation
+- * @arg:      domain specific argument
+- * @realloc:  IRQ descriptors have already been allocated if true
+- * @affinity: Optional irq affinity mask for multiqueue devices
+- *
+- * Allocate IRQ numbers and initialized all data structures to support
+- * hierarchy IRQ domains.
+- * Parameter @realloc is mainly to support legacy IRQs.
+- * Returns error code or allocated IRQ number
+- *
+- * The whole process to setup an IRQ has been split into two steps.
+- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+- * descriptor and required hardware resources. The second step,
+- * irq_domain_activate_irq(), is to program the hardware with preallocated
+- * resources. In this way, it's easier to rollback when failing to
+- * allocate resources.
+- */
+-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+-                          unsigned int nr_irqs, int node, void *arg,
+-                          bool realloc, const struct irq_affinity_desc *affinity)
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++                                      unsigned int nr_irqs, int node, void *arg,
++                                      bool realloc, const struct irq_affinity_desc *affinity)
+ {
+       int i, ret, virq;
+-      if (domain == NULL) {
+-              domain = irq_default_domain;
+-              if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+-                      return -EINVAL;
+-      }
+-
+       if (realloc && irq_base >= 0) {
+               virq = irq_base;
+       } else {
+@@ -1463,24 +1435,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+               goto out_free_desc;
+       }
+-      mutex_lock(&irq_domain_mutex);
+       ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
+-      if (ret < 0) {
+-              mutex_unlock(&irq_domain_mutex);
++      if (ret < 0)
+               goto out_free_irq_data;
+-      }
+       for (i = 0; i < nr_irqs; i++) {
+               ret = irq_domain_trim_hierarchy(virq + i);
+-              if (ret) {
+-                      mutex_unlock(&irq_domain_mutex);
++              if (ret)
+                       goto out_free_irq_data;
+-              }
+       }
+-      
++
+       for (i = 0; i < nr_irqs; i++)
+               irq_domain_insert_irq(virq + i);
+-      mutex_unlock(&irq_domain_mutex);
+       return virq;
+@@ -1491,6 +1457,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+       return ret;
+ }
++/**
++ * __irq_domain_alloc_irqs - Allocate IRQs from domain
++ * @domain:   domain to allocate from
++ * @irq_base: allocate specified IRQ number if irq_base >= 0
++ * @nr_irqs:  number of IRQs to allocate
++ * @node:     NUMA node id for memory allocation
++ * @arg:      domain specific argument
++ * @realloc:  IRQ descriptors have already been allocated if true
++ * @affinity: Optional irq affinity mask for multiqueue devices
++ *
++ * Allocate IRQ numbers and initialized all data structures to support
++ * hierarchy IRQ domains.
++ * Parameter @realloc is mainly to support legacy IRQs.
++ * Returns error code or allocated IRQ number
++ *
++ * The whole process to setup an IRQ has been split into two steps.
++ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
++ * descriptor and required hardware resources. The second step,
++ * irq_domain_activate_irq(), is to program the hardware with preallocated
++ * resources. In this way, it's easier to rollback when failing to
++ * allocate resources.
++ */
++int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
++                          unsigned int nr_irqs, int node, void *arg,
++                          bool realloc, const struct irq_affinity_desc *affinity)
++{
++      int ret;
++
++      if (domain == NULL) {
++              domain = irq_default_domain;
++              if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
++                      return -EINVAL;
++      }
++
++      mutex_lock(&irq_domain_mutex);
++      ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
++                                         realloc, affinity);
++      mutex_unlock(&irq_domain_mutex);
++
++      return ret;
++}
++
+ /* The irq_data was moved, fix the revmap to refer to the new location */
+ static void irq_domain_fix_revmap(struct irq_data *d)
+ {
+-- 
+2.39.2
+
diff --git a/queue-5.10/landlock-add-object-management.patch b/queue-5.10/landlock-add-object-management.patch
new file mode 100644 (file)
index 0000000..3fa753b
--- /dev/null
@@ -0,0 +1,318 @@
+From ab07529d96375f4117929020ea88db6ed07d8abf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Apr 2021 17:41:11 +0200
+Subject: landlock: Add object management
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+[ Upstream commit 90945448e9830aa1b39d7acaa4e0724a001e2ff8 ]
+
+A Landlock object enables to identify a kernel object (e.g. an inode).
+A Landlock rule is a set of access rights allowed on an object.  Rules
+are grouped in rulesets that may be tied to a set of processes (i.e.
+subjects) to enforce a scoped access-control (i.e. a domain).
+
+Because Landlock's goal is to empower any process (especially
+unprivileged ones) to sandbox themselves, we cannot rely on a
+system-wide object identification such as file extended attributes.
+Indeed, we need innocuous, composable and modular access-controls.
+
+The main challenge with these constraints is to identify kernel objects
+while this identification is useful (i.e. when a security policy makes
+use of this object).  But this identification data should be freed once
+no policy is using it.  This ephemeral tagging should not and may not be
+written in the filesystem.  We then need to manage the lifetime of a
+rule according to the lifetime of its objects.  To avoid a global lock,
+this implementation make use of RCU and counters to safely reference
+objects.
+
+A following commit uses this generic object management for inodes.
+
+Cc: James Morris <jmorris@namei.org>
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Acked-by: Serge Hallyn <serge@hallyn.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210422154123.13086-2-mic@digikod.net
+Signed-off-by: James Morris <jamorris@linux.microsoft.com>
+Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS                | 10 +++++
+ security/Kconfig           |  1 +
+ security/Makefile          |  2 +
+ security/landlock/Kconfig  | 21 +++++++++
+ security/landlock/Makefile |  3 ++
+ security/landlock/object.c | 67 ++++++++++++++++++++++++++++
+ security/landlock/object.h | 91 ++++++++++++++++++++++++++++++++++++++
+ 7 files changed, 195 insertions(+)
+ create mode 100644 security/landlock/Kconfig
+ create mode 100644 security/landlock/Makefile
+ create mode 100644 security/landlock/object.c
+ create mode 100644 security/landlock/object.h
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 6c5efc4013ab5..72815c1a325eb 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9836,6 +9836,16 @@ F:      net/core/sock_map.c
+ F:    net/ipv4/tcp_bpf.c
+ F:    net/ipv4/udp_bpf.c
++LANDLOCK SECURITY MODULE
++M:    Mickaël Salaün <mic@digikod.net>
++L:    linux-security-module@vger.kernel.org
++S:    Supported
++W:    https://landlock.io
++T:    git https://github.com/landlock-lsm/linux.git
++F:    security/landlock/
++K:    landlock
++K:    LANDLOCK
++
+ LANTIQ / INTEL Ethernet drivers
+ M:    Hauke Mehrtens <hauke@hauke-m.de>
+ L:    netdev@vger.kernel.org
+diff --git a/security/Kconfig b/security/Kconfig
+index 9893c316da897..7cb5476306676 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -230,6 +230,7 @@ source "security/loadpin/Kconfig"
+ source "security/yama/Kconfig"
+ source "security/safesetid/Kconfig"
+ source "security/lockdown/Kconfig"
++source "security/landlock/Kconfig"
+ source "security/integrity/Kconfig"
+diff --git a/security/Makefile b/security/Makefile
+index 3baf435de5411..47e432900e242 100644
+--- a/security/Makefile
++++ b/security/Makefile
+@@ -13,6 +13,7 @@ subdir-$(CONFIG_SECURITY_LOADPIN)    += loadpin
+ subdir-$(CONFIG_SECURITY_SAFESETID)    += safesetid
+ subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM)        += lockdown
+ subdir-$(CONFIG_BPF_LSM)              += bpf
++subdir-$(CONFIG_SECURITY_LANDLOCK)    += landlock
+ # always enable default capabilities
+ obj-y                                 += commoncap.o
+@@ -32,6 +33,7 @@ obj-$(CONFIG_SECURITY_SAFESETID)       += safesetid/
+ obj-$(CONFIG_SECURITY_LOCKDOWN_LSM)   += lockdown/
+ obj-$(CONFIG_CGROUPS)                 += device_cgroup.o
+ obj-$(CONFIG_BPF_LSM)                 += bpf/
++obj-$(CONFIG_SECURITY_LANDLOCK)               += landlock/
+ # Object integrity file lists
+ subdir-$(CONFIG_INTEGRITY)            += integrity
+diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
+new file mode 100644
+index 0000000000000..c1e862a384107
+--- /dev/null
++++ b/security/landlock/Kconfig
+@@ -0,0 +1,21 @@
++# SPDX-License-Identifier: GPL-2.0-only
++
++config SECURITY_LANDLOCK
++      bool "Landlock support"
++      depends on SECURITY
++      select SECURITY_PATH
++      help
++        Landlock is a sandboxing mechanism that enables processes to restrict
++        themselves (and their future children) by gradually enforcing
++        tailored access control policies.  A Landlock security policy is a
++        set of access rights (e.g. open a file in read-only, make a
++        directory, etc.) tied to a file hierarchy.  Such policy can be
++        configured and enforced by any processes for themselves using the
++        dedicated system calls: landlock_create_ruleset(),
++        landlock_add_rule(), and landlock_restrict_self().
++
++        See Documentation/userspace-api/landlock.rst for further information.
++
++        If you are unsure how to answer this question, answer N.  Otherwise,
++        you should also prepend "landlock," to the content of CONFIG_LSM to
++        enable Landlock at boot time.
+diff --git a/security/landlock/Makefile b/security/landlock/Makefile
+new file mode 100644
+index 0000000000000..cb6deefbf4c09
+--- /dev/null
++++ b/security/landlock/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
++
++landlock-y := object.o
+diff --git a/security/landlock/object.c b/security/landlock/object.c
+new file mode 100644
+index 0000000000000..d674fdf9ff04f
+--- /dev/null
++++ b/security/landlock/object.c
+@@ -0,0 +1,67 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Landlock LSM - Object management
++ *
++ * Copyright Â© 2016-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2018-2020 ANSSI
++ */
++
++#include <linux/bug.h>
++#include <linux/compiler_types.h>
++#include <linux/err.h>
++#include <linux/kernel.h>
++#include <linux/rcupdate.h>
++#include <linux/refcount.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++
++#include "object.h"
++
++struct landlock_object *landlock_create_object(
++              const struct landlock_object_underops *const underops,
++              void *const underobj)
++{
++      struct landlock_object *new_object;
++
++      if (WARN_ON_ONCE(!underops || !underobj))
++              return ERR_PTR(-ENOENT);
++      new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT);
++      if (!new_object)
++              return ERR_PTR(-ENOMEM);
++      refcount_set(&new_object->usage, 1);
++      spin_lock_init(&new_object->lock);
++      new_object->underops = underops;
++      new_object->underobj = underobj;
++      return new_object;
++}
++
++/*
++ * The caller must own the object (i.e. thanks to object->usage) to safely put
++ * it.
++ */
++void landlock_put_object(struct landlock_object *const object)
++{
++      /*
++       * The call to @object->underops->release(object) might sleep, e.g.
++       * because of iput().
++       */
++      might_sleep();
++      if (!object)
++              return;
++
++      /*
++       * If the @object's refcount cannot drop to zero, we can just decrement
++       * the refcount without holding a lock. Otherwise, the decrement must
++       * happen under @object->lock for synchronization with things like
++       * get_inode_object().
++       */
++      if (refcount_dec_and_lock(&object->usage, &object->lock)) {
++              __acquire(&object->lock);
++              /*
++               * With @object->lock initially held, remove the reference from
++               * @object->underobj to @object (if it still exists).
++               */
++              object->underops->release(object);
++              kfree_rcu(object, rcu_free);
++      }
++}
+diff --git a/security/landlock/object.h b/security/landlock/object.h
+new file mode 100644
+index 0000000000000..3f80674c6c8d3
+--- /dev/null
++++ b/security/landlock/object.h
+@@ -0,0 +1,91 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Landlock LSM - Object management
++ *
++ * Copyright Â© 2016-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2018-2020 ANSSI
++ */
++
++#ifndef _SECURITY_LANDLOCK_OBJECT_H
++#define _SECURITY_LANDLOCK_OBJECT_H
++
++#include <linux/compiler_types.h>
++#include <linux/refcount.h>
++#include <linux/spinlock.h>
++
++struct landlock_object;
++
++/**
++ * struct landlock_object_underops - Operations on an underlying object
++ */
++struct landlock_object_underops {
++      /**
++       * @release: Releases the underlying object (e.g. iput() for an inode).
++       */
++      void (*release)(struct landlock_object *const object)
++              __releases(object->lock);
++};
++
++/**
++ * struct landlock_object - Security blob tied to a kernel object
++ *
++ * The goal of this structure is to enable to tie a set of ephemeral access
++ * rights (pertaining to different domains) to a kernel object (e.g an inode)
++ * in a safe way.  This implies to handle concurrent use and modification.
++ *
++ * The lifetime of a &struct landlock_object depends on the rules referring to
++ * it.
++ */
++struct landlock_object {
++      /**
++       * @usage: This counter is used to tie an object to the rules matching
++       * it or to keep it alive while adding a new rule.  If this counter
++       * reaches zero, this struct must not be modified, but this counter can
++       * still be read from within an RCU read-side critical section.  When
++       * adding a new rule to an object with a usage counter of zero, we must
++       * wait until the pointer to this object is set to NULL (or recycled).
++       */
++      refcount_t usage;
++      /**
++       * @lock: Protects against concurrent modifications.  This lock must be
++       * held from the time @usage drops to zero until any weak references
++       * from @underobj to this object have been cleaned up.
++       *
++       * Lock ordering: inode->i_lock nests inside this.
++       */
++      spinlock_t lock;
++      /**
++       * @underobj: Used when cleaning up an object and to mark an object as
++       * tied to its underlying kernel structure.  This pointer is protected
++       * by @lock.  Cf. landlock_release_inodes() and release_inode().
++       */
++      void *underobj;
++      union {
++              /**
++               * @rcu_free: Enables lockless use of @usage, @lock and
++               * @underobj from within an RCU read-side critical section.
++               * @rcu_free and @underops are only used by
++               * landlock_put_object().
++               */
++              struct rcu_head rcu_free;
++              /**
++               * @underops: Enables landlock_put_object() to release the
++               * underlying object (e.g. inode).
++               */
++              const struct landlock_object_underops *underops;
++      };
++};
++
++struct landlock_object *landlock_create_object(
++              const struct landlock_object_underops *const underops,
++              void *const underobj);
++
++void landlock_put_object(struct landlock_object *const object);
++
++static inline void landlock_get_object(struct landlock_object *const object)
++{
++      if (object)
++              refcount_inc(&object->usage);
++}
++
++#endif /* _SECURITY_LANDLOCK_OBJECT_H */
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch b/queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch
new file mode 100644 (file)
index 0000000..c75eb30
--- /dev/null
@@ -0,0 +1,86 @@
+From 9a4a568b921a12aa558adab24b2201df4e5b2419 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 01:39:13 +0900
+Subject: net: caif: Fix use-after-free in cfusbl_device_notify()
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 9781e98a97110f5e76999058368b4be76a788484 ]
+
+syzbot reported use-after-free in cfusbl_device_notify() [1].  This
+causes a stack trace like below:
+
+BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
+Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214
+
+CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: netns cleanup_net
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313
+ print_report mm/kasan/report.c:429 [inline]
+ kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
+ cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:87
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945
+ call_netdevice_notifiers_extack net/core/dev.c:1983 [inline]
+ call_netdevice_notifiers net/core/dev.c:1997 [inline]
+ netdev_wait_allrefs_any net/core/dev.c:10227 [inline]
+ netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341
+ default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334
+ ops_exit_list+0x125/0x170 net/core/net_namespace.c:167
+ cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594
+ process_one_work+0x996/0x1610 kernel/workqueue.c:2289
+ worker_thread+0x665/0x1080 kernel/workqueue.c:2436
+ kthread+0x2e9/0x3a0 kernel/kthread.c:376
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302
+ </TASK>
+
+When unregistering a net device, unregister_netdevice_many_notify()
+sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers
+with NETDEV_UNREGISTER, and adds the device to the todo list.
+
+Later on, devices in the todo list are processed by netdev_run_todo().
+netdev_run_todo() waits devices' reference count become 1 while
+rebdoadcasting NETDEV_UNREGISTER notification.
+
+When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple
+times, the parent device might be freed.  This could cause UAF.
+Processing NETDEV_UNREGISTER multiple times also causes inbalance of
+reference count for the module.
+
+This patch fixes the issue by accepting only first NETDEV_UNREGISTER
+notification.
+
+Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface")
+CC: sjur.brandeland@stericsson.com <sjur.brandeland@stericsson.com>
+Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1]
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/caif/caif_usb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
+index b02e1292f7f19..24488a4e2d26e 100644
+--- a/net/caif/caif_usb.c
++++ b/net/caif/caif_usb.c
+@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
+       struct usb_device *usbdev;
+       int res;
++      if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
++              return 0;
++
+       /* Check whether we have a NCM device, and find its VID/PID. */
+       if (!(dev->dev.parent && dev->dev.parent->driver &&
+             strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0))
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch b/queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch
new file mode 100644 (file)
index 0000000..a4dbce3
--- /dev/null
@@ -0,0 +1,71 @@
+From 087a540b3deaabe61acf91b3ef7446bd9a2519ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Mar 2023 13:43:20 +0000
+Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Daniel Golle <daniel@makrotopia.org>
+
+[ Upstream commit 193250ace270fecd586dd2d0dfbd9cbd2ade977f ]
+
+Fix data corruption issue with SerDes connected PHYs operating at 1.25
+Gbps speed where we could previously observe about 30% packet loss while
+the bad packet counter was increasing.
+
+As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531
+switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using
+rate-adaptation to 2500Base-X mode, this issue only got exposed now when
+we started trying to use SFP modules operating with 1.25 Gbps with the
+BananaPi R3 board.
+
+The fix is to set bit 12 which disables the RX FIFO clear function when
+setting up MAC MCR, MediaTek SDK did the same change stating:
+"If without this patch, kernel might receive invalid packets that are
+corrupted by GMAC."[1]
+
+[1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63
+
+Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC")
+Tested-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 217dc67c48fa2..a8319295f1ab2 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -354,7 +354,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
+       mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+       mcr_new = mcr_cur;
+       mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+-                 MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
++                 MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK |
++                 MAC_MCR_RX_FIFO_CLR_DIS;
+       /* Only update control register when needed! */
+       if (mcr_new != mcr_cur)
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index 54a7cd93cc0fe..0ca3223ad5457 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -339,6 +339,7 @@
+ #define MAC_MCR_FORCE_MODE    BIT(15)
+ #define MAC_MCR_TX_EN         BIT(14)
+ #define MAC_MCR_RX_EN         BIT(13)
++#define MAC_MCR_RX_FIFO_CLR_DIS       BIT(12)
+ #define MAC_MCR_BACKOFF_EN    BIT(9)
+ #define MAC_MCR_BACKPR_EN     BIT(8)
+ #define MAC_MCR_FORCE_RX_FC   BIT(5)
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch b/queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch
new file mode 100644 (file)
index 0000000..14265f7
--- /dev/null
@@ -0,0 +1,126 @@
+From 3ca18a6f22982efdd0b4321d6431dacd3483658b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Mar 2023 08:43:07 -0700
+Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific
+ registers from the MAC driver
+
+From: Yuiko Oshino <yuiko.oshino@microchip.com>
+
+[ Upstream commit e57cf3639c323eeed05d3725fd82f91b349adca8 ]
+
+Move the LAN7800 internal phy (phy ID  0x0007c132) specific register
+accesses to the phy driver (microchip.c).
+
+Fix the error reported by Enguerrand de Ribaucourt in December 2022,
+"Some operations during the cable switch workaround modify the register
+LAN88XX_INT_MASK of the PHY. However, this register is specific to the
+LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801,
+that register (0x19), corresponds to the LED and MAC address
+configuration, resulting in unapropriate behavior."
+
+I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800
+with the internal PHY.
+
+Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error")
+Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++
+ drivers/net/usb/lan78xx.c   | 27 +--------------------------
+ 2 files changed, 33 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
+index a644e8e5071c3..375bbd60b38af 100644
+--- a/drivers/net/phy/microchip.c
++++ b/drivers/net/phy/microchip.c
+@@ -326,6 +326,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev)
+       return genphy_config_aneg(phydev);
+ }
++static void lan88xx_link_change_notify(struct phy_device *phydev)
++{
++      int temp;
++
++      /* At forced 100 F/H mode, chip may fail to set mode correctly
++       * when cable is switched between long(~50+m) and short one.
++       * As workaround, set to 10 before setting to 100
++       * at forced 100 F/H mode.
++       */
++      if (!phydev->autoneg && phydev->speed == 100) {
++              /* disable phy interrupt */
++              temp = phy_read(phydev, LAN88XX_INT_MASK);
++              temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
++              phy_write(phydev, LAN88XX_INT_MASK, temp);
++
++              temp = phy_read(phydev, MII_BMCR);
++              temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
++              phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
++              temp |= BMCR_SPEED100;
++              phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
++
++              /* clear pending interrupt generated while workaround */
++              temp = phy_read(phydev, LAN88XX_INT_STS);
++
++              /* enable phy interrupt back */
++              temp = phy_read(phydev, LAN88XX_INT_MASK);
++              temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
++              phy_write(phydev, LAN88XX_INT_MASK, temp);
++      }
++}
++
+ static struct phy_driver microchip_phy_driver[] = {
+ {
+       .phy_id         = 0x0007c130,
+@@ -339,6 +370,7 @@ static struct phy_driver microchip_phy_driver[] = {
+       .config_init    = lan88xx_config_init,
+       .config_aneg    = lan88xx_config_aneg,
++      .link_change_notify = lan88xx_link_change_notify,
+       .ack_interrupt  = lan88xx_phy_ack_interrupt,
+       .config_intr    = lan88xx_phy_config_intr,
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 0b5b4f9c7c5b9..667984efeb3be 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1843,33 +1843,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
+ static void lan78xx_link_status_change(struct net_device *net)
+ {
+       struct phy_device *phydev = net->phydev;
+-      int temp;
+-
+-      /* At forced 100 F/H mode, chip may fail to set mode correctly
+-       * when cable is switched between long(~50+m) and short one.
+-       * As workaround, set to 10 before setting to 100
+-       * at forced 100 F/H mode.
+-       */
+-      if (!phydev->autoneg && (phydev->speed == 100)) {
+-              /* disable phy interrupt */
+-              temp = phy_read(phydev, LAN88XX_INT_MASK);
+-              temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+-              phy_write(phydev, LAN88XX_INT_MASK, temp);
+-              temp = phy_read(phydev, MII_BMCR);
+-              temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+-              phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
+-              temp |= BMCR_SPEED100;
+-              phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
+-
+-              /* clear pending interrupt generated while workaround */
+-              temp = phy_read(phydev, LAN88XX_INT_STS);
+-
+-              /* enable phy interrupt back */
+-              temp = phy_read(phydev, LAN88XX_INT_MASK);
+-              temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+-              phy_write(phydev, LAN88XX_INT_MASK, temp);
+-      }
++      phy_print_status(phydev);
+ }
+ static int irq_map(struct irq_domain *d, unsigned int irq,
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch b/queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch
new file mode 100644 (file)
index 0000000..26d1bd4
--- /dev/null
@@ -0,0 +1,174 @@
+From d6acee209596f0b12703c102f93bb74f3a892cdd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 16:37:54 +0000
+Subject: net: phylib: get rid of unnecessary locking
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f4b47a2e9463950df3e7c8b70e017877c1d4eb11 ]
+
+The locking in phy_probe() and phy_remove() does very little to prevent
+any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA
+warnings. Remove it.
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.2.0-dirty #1108 Tainted: G        W   E
+------------------------------------------------------
+ip/415 is trying to acquire lock:
+ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy]
+
+but task is already holding lock:
+ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (rtnl_mutex){+.+.}-{3:3}:
+       __lock_acquire+0x35c/0x6c0
+       lock_acquire.part.0+0xcc/0x220
+       lock_acquire+0x68/0x84
+       __mutex_lock+0x8c/0x414
+       mutex_lock_nested+0x34/0x40
+       rtnl_lock+0x24/0x30
+       sfp_bus_add_upstream+0x34/0x150
+       phy_sfp_probe+0x4c/0x94 [libphy]
+       mv3310_probe+0x148/0x184 [marvell10g]
+       phy_probe+0x8c/0x200 [libphy]
+       call_driver_probe+0xbc/0x15c
+       really_probe+0xc0/0x320
+       __driver_probe_device+0x84/0x120
+       driver_probe_device+0x44/0x120
+       __device_attach_driver+0xc4/0x160
+       bus_for_each_drv+0x80/0xe0
+       __device_attach+0xb0/0x1f0
+       device_initial_probe+0x1c/0x2c
+       bus_probe_device+0xa4/0xb0
+       device_add+0x360/0x53c
+       phy_device_register+0x60/0xa4 [libphy]
+       fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio]
+       fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio]
+       of_mdiobus_register+0x140/0x340 [of_mdio]
+       orion_mdio_probe+0x298/0x3c0 [mvmdio]
+       platform_probe+0x70/0xe0
+       call_driver_probe+0x34/0x15c
+       really_probe+0xc0/0x320
+       __driver_probe_device+0x84/0x120
+       driver_probe_device+0x44/0x120
+       __driver_attach+0x104/0x210
+       bus_for_each_dev+0x78/0xdc
+       driver_attach+0x2c/0x3c
+       bus_add_driver+0x184/0x240
+       driver_register+0x80/0x13c
+       __platform_driver_register+0x30/0x3c
+       xt_compat_calc_jump+0x28/0xa4 [x_tables]
+       do_one_initcall+0x50/0x1b0
+       do_init_module+0x50/0x1fc
+       load_module+0x684/0x744
+       __do_sys_finit_module+0xc4/0x140
+       __arm64_sys_finit_module+0x28/0x34
+       invoke_syscall+0x50/0x120
+       el0_svc_common.constprop.0+0x6c/0x1b0
+       do_el0_svc+0x34/0x44
+       el0_svc+0x48/0xf0
+       el0t_64_sync_handler+0xb8/0xc0
+       el0t_64_sync+0x1a0/0x1a4
+
+-> #0 (&dev->lock){+.+.}-{3:3}:
+       check_prev_add+0xb4/0xc80
+       validate_chain+0x414/0x47c
+       __lock_acquire+0x35c/0x6c0
+       lock_acquire.part.0+0xcc/0x220
+       lock_acquire+0x68/0x84
+       __mutex_lock+0x8c/0x414
+       mutex_lock_nested+0x34/0x40
+       phy_attach_direct+0x17c/0x3a0 [libphy]
+       phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink]
+       phylink_fwnode_phy_connect+0x48/0x60 [phylink]
+       mvpp2_open+0xec/0x2e0 [mvpp2]
+       __dev_open+0x104/0x214
+       __dev_change_flags+0x1d4/0x254
+       dev_change_flags+0x2c/0x7c
+       do_setlink+0x254/0xa50
+       __rtnl_newlink+0x430/0x514
+       rtnl_newlink+0x58/0x8c
+       rtnetlink_rcv_msg+0x17c/0x560
+       netlink_rcv_skb+0x64/0x150
+       rtnetlink_rcv+0x20/0x30
+       netlink_unicast+0x1d4/0x2b4
+       netlink_sendmsg+0x1a4/0x400
+       ____sys_sendmsg+0x228/0x290
+       ___sys_sendmsg+0x88/0xec
+       __sys_sendmsg+0x70/0xd0
+       __arm64_sys_sendmsg+0x2c/0x40
+       invoke_syscall+0x50/0x120
+       el0_svc_common.constprop.0+0x6c/0x1b0
+       do_el0_svc+0x34/0x44
+       el0_svc+0x48/0xf0
+       el0t_64_sync_handler+0xb8/0xc0
+       el0t_64_sync+0x1a0/0x1a4
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(rtnl_mutex);
+                               lock(&dev->lock);
+                               lock(rtnl_mutex);
+  lock(&dev->lock);
+
+ *** DEADLOCK ***
+
+Fixes: 298e54fa810e ("net: phy: add core phylib sfp support")
+Reported-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 3ef5aa6b72a7e..e771e0e8a9bc6 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -2833,8 +2833,6 @@ static int phy_probe(struct device *dev)
+       if (phydrv->flags & PHY_IS_INTERNAL)
+               phydev->is_internal = true;
+-      mutex_lock(&phydev->lock);
+-
+       /* Deassert the reset signal */
+       phy_device_reset(phydev, 0);
+@@ -2903,12 +2901,10 @@ static int phy_probe(struct device *dev)
+       phydev->state = PHY_READY;
+ out:
+-      /* Assert the reset signal */
++      /* Re-assert the reset signal on error */
+       if (err)
+               phy_device_reset(phydev, 1);
+-      mutex_unlock(&phydev->lock);
+-
+       return err;
+ }
+@@ -2918,9 +2914,7 @@ static int phy_remove(struct device *dev)
+       cancel_delayed_work_sync(&phydev->state_queue);
+-      mutex_lock(&phydev->lock);
+       phydev->state = PHY_DOWN;
+-      mutex_unlock(&phydev->lock);
+       sfp_bus_del_upstream(phydev->sfp_bus);
+       phydev->sfp_bus = NULL;
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch b/queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch
new file mode 100644 (file)
index 0000000..dad9b69
--- /dev/null
@@ -0,0 +1,74 @@
+From 15737a362a18c35944df6ef005f6b797cc011146 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Mar 2023 11:23:46 +0800
+Subject: net/smc: fix fallback failed while sendmsg with fastopen
+
+From: D. Wythe <alibuda@linux.alibaba.com>
+
+[ Upstream commit ce7ca794712f186da99719e8b4e97bd5ddbb04c3 ]
+
+Before determining whether the msg has unsupported options, it has been
+prematurely terminated by the wrong status check.
+
+For the application, the general usages of MSG_FASTOPEN likes
+
+fd = socket(...)
+/* rather than connect */
+sendto(fd, data, len, MSG_FASTOPEN)
+
+Hence, We need to check the flag before state check, because the sock
+state here is always SMC_INIT when applications tries MSG_FASTOPEN.
+Once we found unsupported options, fallback it to TCP.
+
+Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback")
+Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
+Signed-off-by: Simon Horman <simon.horman@corigine.com>
+
+v2 -> v1: Optimize code style
+Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/af_smc.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 41cbc7c89c9d2..8ab84926816f6 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -1988,16 +1988,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ {
+       struct sock *sk = sock->sk;
+       struct smc_sock *smc;
+-      int rc = -EPIPE;
++      int rc;
+       smc = smc_sk(sk);
+       lock_sock(sk);
+-      if ((sk->sk_state != SMC_ACTIVE) &&
+-          (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+-          (sk->sk_state != SMC_INIT))
+-              goto out;
++      /* SMC does not support connect with fastopen */
+       if (msg->msg_flags & MSG_FASTOPEN) {
++              /* not connected yet, fallback */
+               if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+                       smc_switch_to_fallback(smc);
+                       smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+@@ -2005,6 +2003,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+                       rc = -EINVAL;
+                       goto out;
+               }
++      } else if ((sk->sk_state != SMC_ACTIVE) &&
++                 (sk->sk_state != SMC_APPCLOSEWAIT1) &&
++                 (sk->sk_state != SMC_INIT)) {
++              rc = -EPIPE;
++              goto out;
+       }
+       if (smc->use_fallback)
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch b/queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch
new file mode 100644 (file)
index 0000000..c77dc8d
--- /dev/null
@@ -0,0 +1,50 @@
+From ff5576cb0372afdabad429f5c9fc87d6ffb29972 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 14:21:43 +0800
+Subject: net: stmmac: add to set device wake up flag when stmmac init phy
+
+From: Rongguang Wei <weirongguang@kylinos.cn>
+
+[ Upstream commit a9334b702a03b693f54ebd3b98f67bf722b74870 ]
+
+When MAC is not support PMT, driver will check PHY's WoL capability
+and set device wakeup capability in stmmac_init_phy(). We can enable
+the WoL through ethtool, the driver would enable the device wake up
+flag. Now the device_may_wakeup() return true.
+
+But if there is a way which enable the PHY's WoL capability derectly,
+like in BIOS. The driver would not know the enable thing and would not
+set the device wake up flag. The phy_suspend may failed like this:
+
+[   32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16
+[   32.409065] PM: Device stmmac-1:00 failed to suspend: error -16
+[   32.409067] PM: Some devices failed to suspend, or early wake event detected
+
+Add to set the device wakeup enable flag according to the get_wol
+function result in PHY can fix the error in this scene.
+
+v2: add a Fixes tag.
+
+Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy")
+Signed-off-by: Rongguang Wei <weirongguang@kylinos.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 1ec000d4c7705..04c59102a2863 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1145,6 +1145,7 @@ static int stmmac_init_phy(struct net_device *dev)
+               phylink_ethtool_get_wol(priv->phylink, &wol);
+               device_set_wakeup_capable(priv->device, !!wol.supported);
++              device_set_wakeup_enable(priv->device, !!wol.wolopts);
+       }
+       return ret;
+-- 
+2.39.2
+
diff --git a/queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch b/queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch
new file mode 100644 (file)
index 0000000..a59e2ac
--- /dev/null
@@ -0,0 +1,540 @@
+From 5bd38ea4f8a413bd78444f4001fd1ca2b70e9193 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Nov 2020 11:45:06 +0000
+Subject: net: usb: lan78xx: Remove lots of set but unused 'ret' variables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lee Jones <lee.jones@linaro.org>
+
+[ Upstream commit 06cd7c46b3ab3f2252c61bf85b191236cf0254e1 ]
+
+Fixes the following W=1 kernel build warning(s):
+
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_read_raw_otp’:
+ drivers/net/usb/lan78xx.c:825:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_write_raw_otp’:
+ drivers/net/usb/lan78xx.c:879:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_deferred_multicast_write’:
+ drivers/net/usb/lan78xx.c:1041:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_update_flowcontrol’:
+ drivers/net/usb/lan78xx.c:1127:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_init_mac_address’:
+ drivers/net/usb/lan78xx.c:1666:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_link_status_change’:
+ drivers/net/usb/lan78xx.c:1841:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_irq_bus_sync_unlock’:
+ drivers/net/usb/lan78xx.c:1920:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan8835_fixup’:
+ drivers/net/usb/lan78xx.c:1994:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_set_rx_max_frame_length’:
+ drivers/net/usb/lan78xx.c:2192:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_change_mtu’:
+ drivers/net/usb/lan78xx.c:2270:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_set_mac_addr’:
+ drivers/net/usb/lan78xx.c:2299:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_set_features’:
+ drivers/net/usb/lan78xx.c:2333:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function â€˜lan78xx_set_suspend’:
+ drivers/net/usb/lan78xx.c:3807:6: warning: variable â€˜ret’ set but not used [-Wunused-but-set-variable]
+
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Link: https://lore.kernel.org/r/20201102114512.1062724-25-lee.jones@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: e57cf3639c32 ("net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/lan78xx.c | 168 ++++++++++++++++++--------------------
+ 1 file changed, 78 insertions(+), 90 deletions(-)
+
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 6f7b70522d926..0b5b4f9c7c5b9 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -824,20 +824,19 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+                               u32 length, u8 *data)
+ {
+       int i;
+-      int ret;
+       u32 buf;
+       unsigned long timeout;
+-      ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++      lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+       if (buf & OTP_PWR_DN_PWRDN_N_) {
+               /* clear it and wait to be cleared */
+-              ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
++              lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+               timeout = jiffies + HZ;
+               do {
+                       usleep_range(1, 10);
+-                      ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++                      lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+                       if (time_after(jiffies, timeout)) {
+                               netdev_warn(dev->net,
+                                           "timeout on OTP_PWR_DN");
+@@ -847,18 +846,18 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+       }
+       for (i = 0; i < length; i++) {
+-              ret = lan78xx_write_reg(dev, OTP_ADDR1,
++              lan78xx_write_reg(dev, OTP_ADDR1,
+                                       ((offset + i) >> 8) & OTP_ADDR1_15_11);
+-              ret = lan78xx_write_reg(dev, OTP_ADDR2,
++              lan78xx_write_reg(dev, OTP_ADDR2,
+                                       ((offset + i) & OTP_ADDR2_10_3));
+-              ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
+-              ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
++              lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
++              lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+               timeout = jiffies + HZ;
+               do {
+                       udelay(1);
+-                      ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
++                      lan78xx_read_reg(dev, OTP_STATUS, &buf);
+                       if (time_after(jiffies, timeout)) {
+                               netdev_warn(dev->net,
+                                           "timeout on OTP_STATUS");
+@@ -866,7 +865,7 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+                       }
+               } while (buf & OTP_STATUS_BUSY_);
+-              ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
++              lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
+               data[i] = (u8)(buf & 0xFF);
+       }
+@@ -878,20 +877,19 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
+                                u32 length, u8 *data)
+ {
+       int i;
+-      int ret;
+       u32 buf;
+       unsigned long timeout;
+-      ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++      lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+       if (buf & OTP_PWR_DN_PWRDN_N_) {
+               /* clear it and wait to be cleared */
+-              ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
++              lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+               timeout = jiffies + HZ;
+               do {
+                       udelay(1);
+-                      ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++                      lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+                       if (time_after(jiffies, timeout)) {
+                               netdev_warn(dev->net,
+                                           "timeout on OTP_PWR_DN completion");
+@@ -901,21 +899,21 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
+       }
+       /* set to BYTE program mode */
+-      ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
++      lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
+       for (i = 0; i < length; i++) {
+-              ret = lan78xx_write_reg(dev, OTP_ADDR1,
++              lan78xx_write_reg(dev, OTP_ADDR1,
+                                       ((offset + i) >> 8) & OTP_ADDR1_15_11);
+-              ret = lan78xx_write_reg(dev, OTP_ADDR2,
++              lan78xx_write_reg(dev, OTP_ADDR2,
+                                       ((offset + i) & OTP_ADDR2_10_3));
+-              ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
+-              ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
+-              ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
++              lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
++              lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
++              lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+               timeout = jiffies + HZ;
+               do {
+                       udelay(1);
+-                      ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
++                      lan78xx_read_reg(dev, OTP_STATUS, &buf);
+                       if (time_after(jiffies, timeout)) {
+                               netdev_warn(dev->net,
+                                           "Timeout on OTP_STATUS completion");
+@@ -1040,7 +1038,6 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
+                       container_of(param, struct lan78xx_priv, set_multicast);
+       struct lan78xx_net *dev = pdata->dev;
+       int i;
+-      int ret;
+       netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
+                 pdata->rfe_ctl);
+@@ -1049,14 +1046,14 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
+                              DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+       for (i = 1; i < NUM_OF_MAF; i++) {
+-              ret = lan78xx_write_reg(dev, MAF_HI(i), 0);
+-              ret = lan78xx_write_reg(dev, MAF_LO(i),
++              lan78xx_write_reg(dev, MAF_HI(i), 0);
++              lan78xx_write_reg(dev, MAF_LO(i),
+                                       pdata->pfilter_table[i][1]);
+-              ret = lan78xx_write_reg(dev, MAF_HI(i),
++              lan78xx_write_reg(dev, MAF_HI(i),
+                                       pdata->pfilter_table[i][0]);
+       }
+-      ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
++      lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+ }
+ static void lan78xx_set_multicast(struct net_device *netdev)
+@@ -1126,7 +1123,6 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
+                                     u16 lcladv, u16 rmtadv)
+ {
+       u32 flow = 0, fct_flow = 0;
+-      int ret;
+       u8 cap;
+       if (dev->fc_autoneg)
+@@ -1149,10 +1145,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
+                 (cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
+                 (cap & FLOW_CTRL_TX ? "enabled" : "disabled"));
+-      ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow);
++      lan78xx_write_reg(dev, FCT_FLOW, fct_flow);
+       /* threshold value should be set before enabling flow */
+-      ret = lan78xx_write_reg(dev, FLOW, flow);
++      lan78xx_write_reg(dev, FLOW, flow);
+       return 0;
+ }
+@@ -1673,11 +1669,10 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
+ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
+ {
+       u32 addr_lo, addr_hi;
+-      int ret;
+       u8 addr[6];
+-      ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
+-      ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
++      lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
++      lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
+       addr[0] = addr_lo & 0xFF;
+       addr[1] = (addr_lo >> 8) & 0xFF;
+@@ -1710,12 +1705,12 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
+                         (addr[2] << 16) | (addr[3] << 24);
+               addr_hi = addr[4] | (addr[5] << 8);
+-              ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+-              ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
++              lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
++              lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+       }
+-      ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+-      ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
++      lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
++      lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+       ether_addr_copy(dev->net->dev_addr, addr);
+ }
+@@ -1848,7 +1843,7 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
+ static void lan78xx_link_status_change(struct net_device *net)
+ {
+       struct phy_device *phydev = net->phydev;
+-      int ret, temp;
++      int temp;
+       /* At forced 100 F/H mode, chip may fail to set mode correctly
+        * when cable is switched between long(~50+m) and short one.
+@@ -1859,7 +1854,7 @@ static void lan78xx_link_status_change(struct net_device *net)
+               /* disable phy interrupt */
+               temp = phy_read(phydev, LAN88XX_INT_MASK);
+               temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+-              ret = phy_write(phydev, LAN88XX_INT_MASK, temp);
++              phy_write(phydev, LAN88XX_INT_MASK, temp);
+               temp = phy_read(phydev, MII_BMCR);
+               temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+@@ -1873,7 +1868,7 @@ static void lan78xx_link_status_change(struct net_device *net)
+               /* enable phy interrupt back */
+               temp = phy_read(phydev, LAN88XX_INT_MASK);
+               temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+-              ret = phy_write(phydev, LAN88XX_INT_MASK, temp);
++              phy_write(phydev, LAN88XX_INT_MASK, temp);
+       }
+ }
+@@ -1927,14 +1922,13 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd)
+       struct lan78xx_net *dev =
+                       container_of(data, struct lan78xx_net, domain_data);
+       u32 buf;
+-      int ret;
+       /* call register access here because irq_bus_lock & irq_bus_sync_unlock
+        * are only two callbacks executed in non-atomic contex.
+        */
+-      ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
++      lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+       if (buf != data->irqenable)
+-              ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
++              lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
+       mutex_unlock(&data->irq_lock);
+ }
+@@ -2001,7 +1995,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev)
+ static int lan8835_fixup(struct phy_device *phydev)
+ {
+       int buf;
+-      int ret;
+       struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
+       /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */
+@@ -2011,11 +2004,11 @@ static int lan8835_fixup(struct phy_device *phydev)
+       phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf);
+       /* RGMII MAC TXC Delay Enable */
+-      ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
++      lan78xx_write_reg(dev, MAC_RGMII_ID,
+                               MAC_RGMII_ID_TXC_DELAY_EN_);
+       /* RGMII TX DLL Tune Adjust */
+-      ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
++      lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+       dev->interface = PHY_INTERFACE_MODE_RGMII_TXID;
+@@ -2199,28 +2192,27 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
+ static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
+ {
+-      int ret = 0;
+       u32 buf;
+       bool rxenabled;
+-      ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++      lan78xx_read_reg(dev, MAC_RX, &buf);
+       rxenabled = ((buf & MAC_RX_RXEN_) != 0);
+       if (rxenabled) {
+               buf &= ~MAC_RX_RXEN_;
+-              ret = lan78xx_write_reg(dev, MAC_RX, buf);
++              lan78xx_write_reg(dev, MAC_RX, buf);
+       }
+       /* add 4 to size for FCS */
+       buf &= ~MAC_RX_MAX_SIZE_MASK_;
+       buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_);
+-      ret = lan78xx_write_reg(dev, MAC_RX, buf);
++      lan78xx_write_reg(dev, MAC_RX, buf);
+       if (rxenabled) {
+               buf |= MAC_RX_RXEN_;
+-              ret = lan78xx_write_reg(dev, MAC_RX, buf);
++              lan78xx_write_reg(dev, MAC_RX, buf);
+       }
+       return 0;
+@@ -2277,13 +2269,12 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
+       int ll_mtu = new_mtu + netdev->hard_header_len;
+       int old_hard_mtu = dev->hard_mtu;
+       int old_rx_urb_size = dev->rx_urb_size;
+-      int ret;
+       /* no second zero-length packet read wanted after mtu-sized packets */
+       if ((ll_mtu % dev->maxpacket) == 0)
+               return -EDOM;
+-      ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
++      lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
+       netdev->mtu = new_mtu;
+@@ -2306,7 +2297,6 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+       struct lan78xx_net *dev = netdev_priv(netdev);
+       struct sockaddr *addr = p;
+       u32 addr_lo, addr_hi;
+-      int ret;
+       if (netif_running(netdev))
+               return -EBUSY;
+@@ -2323,12 +2313,12 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+       addr_hi = netdev->dev_addr[4] |
+                 netdev->dev_addr[5] << 8;
+-      ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+-      ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
++      lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
++      lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+       /* Added to support MAC address changes */
+-      ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+-      ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
++      lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
++      lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+       return 0;
+ }
+@@ -2340,7 +2330,6 @@ static int lan78xx_set_features(struct net_device *netdev,
+       struct lan78xx_net *dev = netdev_priv(netdev);
+       struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+       unsigned long flags;
+-      int ret;
+       spin_lock_irqsave(&pdata->rfe_ctl_lock, flags);
+@@ -2364,7 +2353,7 @@ static int lan78xx_set_features(struct net_device *netdev,
+       spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags);
+-      ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
++      lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+       return 0;
+ }
+@@ -3820,7 +3809,6 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len)
+ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ {
+       u32 buf;
+-      int ret;
+       int mask_index;
+       u16 crc;
+       u32 temp_wucsr;
+@@ -3829,26 +3817,26 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+       const u8 ipv6_multicast[3] = { 0x33, 0x33 };
+       const u8 arp_type[2] = { 0x08, 0x06 };
+-      ret = lan78xx_read_reg(dev, MAC_TX, &buf);
++      lan78xx_read_reg(dev, MAC_TX, &buf);
+       buf &= ~MAC_TX_TXEN_;
+-      ret = lan78xx_write_reg(dev, MAC_TX, buf);
+-      ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++      lan78xx_write_reg(dev, MAC_TX, buf);
++      lan78xx_read_reg(dev, MAC_RX, &buf);
+       buf &= ~MAC_RX_RXEN_;
+-      ret = lan78xx_write_reg(dev, MAC_RX, buf);
++      lan78xx_write_reg(dev, MAC_RX, buf);
+-      ret = lan78xx_write_reg(dev, WUCSR, 0);
+-      ret = lan78xx_write_reg(dev, WUCSR2, 0);
+-      ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
++      lan78xx_write_reg(dev, WUCSR, 0);
++      lan78xx_write_reg(dev, WUCSR2, 0);
++      lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+       temp_wucsr = 0;
+       temp_pmt_ctl = 0;
+-      ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
++      lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+       temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_;
+       temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_;
+       for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++)
+-              ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+       mask_index = 0;
+       if (wol & WAKE_PHY) {
+@@ -3877,30 +3865,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+               /* set WUF_CFG & WUF_MASK for IPv4 Multicast */
+               crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3);
+-              ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++              lan78xx_write_reg(dev, WUF_CFG(mask_index),
+                                       WUF_CFGX_EN_ |
+                                       WUF_CFGX_TYPE_MCAST_ |
+                                       (0 << WUF_CFGX_OFFSET_SHIFT_) |
+                                       (crc & WUF_CFGX_CRC16_MASK_));
+-              ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
+-              ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
++              lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               mask_index++;
+               /* for IPv6 Multicast */
+               crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2);
+-              ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++              lan78xx_write_reg(dev, WUF_CFG(mask_index),
+                                       WUF_CFGX_EN_ |
+                                       WUF_CFGX_TYPE_MCAST_ |
+                                       (0 << WUF_CFGX_OFFSET_SHIFT_) |
+                                       (crc & WUF_CFGX_CRC16_MASK_));
+-              ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
+-              ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
++              lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               mask_index++;
+               temp_pmt_ctl |= PMT_CTL_WOL_EN_;
+@@ -3921,16 +3909,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+                * for packettype (offset 12,13) = ARP (0x0806)
+                */
+               crc = lan78xx_wakeframe_crc16(arp_type, 2);
+-              ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++              lan78xx_write_reg(dev, WUF_CFG(mask_index),
+                                       WUF_CFGX_EN_ |
+                                       WUF_CFGX_TYPE_ALL_ |
+                                       (0 << WUF_CFGX_OFFSET_SHIFT_) |
+                                       (crc & WUF_CFGX_CRC16_MASK_));
+-              ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
+-              ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+-              ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
++              lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++              lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+               mask_index++;
+               temp_pmt_ctl |= PMT_CTL_WOL_EN_;
+@@ -3938,7 +3926,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+               temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
+       }
+-      ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr);
++      lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+       /* when multiple WOL bits are set */
+       if (hweight_long((unsigned long)wol) > 1) {
+@@ -3946,16 +3934,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+               temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_;
+               temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
+       }
+-      ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
++      lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+       /* clear WUPS */
+-      ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
++      lan78xx_read_reg(dev, PMT_CTL, &buf);
+       buf |= PMT_CTL_WUPS_MASK_;
+-      ret = lan78xx_write_reg(dev, PMT_CTL, buf);
++      lan78xx_write_reg(dev, PMT_CTL, buf);
+-      ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++      lan78xx_read_reg(dev, MAC_RX, &buf);
+       buf |= MAC_RX_RXEN_;
+-      ret = lan78xx_write_reg(dev, MAC_RX, buf);
++      lan78xx_write_reg(dev, MAC_RX, buf);
+       return 0;
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch b/queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch
new file mode 100644 (file)
index 0000000..36912af
--- /dev/null
@@ -0,0 +1,80 @@
+From 6cb1137d72bb5310fa2ef6f663fc7b7adf26ee20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 17:48:31 -0800
+Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type
+
+From: Ivan Delalande <colona@arista.com>
+
+[ Upstream commit 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e ]
+
+It seems that change was unintentional, we have userspace code that
+needs the mark while listening for events like REPLY, DESTROY, etc.
+Also include 0-marks in requested dumps, as they were before that fix.
+
+Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark")
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conntrack_netlink.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index f8ba3bc25cf34..c9ca857f1068d 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -317,11 +317,12 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+ }
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
++static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct,
++                             bool dump)
+ {
+       u32 mark = READ_ONCE(ct->mark);
+-      if (!mark)
++      if (!mark && !dump)
+               return 0;
+       if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
+@@ -332,7 +333,7 @@ static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+       return -1;
+ }
+ #else
+-#define ctnetlink_dump_mark(a, b) (0)
++#define ctnetlink_dump_mark(a, b, c) (0)
+ #endif
+ #ifdef CONFIG_NF_CONNTRACK_SECMARK
+@@ -537,7 +538,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+ {
+       if (ctnetlink_dump_status(skb, ct) < 0 ||
+-          ctnetlink_dump_mark(skb, ct) < 0 ||
++          ctnetlink_dump_mark(skb, ct, true) < 0 ||
+           ctnetlink_dump_secctx(skb, ct) < 0 ||
+           ctnetlink_dump_id(skb, ct) < 0 ||
+           ctnetlink_dump_use(skb, ct) < 0 ||
+@@ -816,8 +817,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+       }
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+-      if (events & (1 << IPCT_MARK) &&
+-          ctnetlink_dump_mark(skb, ct) < 0)
++      if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
+               goto nla_put_failure;
+ #endif
+       nlmsg_end(skb, nlh);
+@@ -2734,7 +2734,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
+               goto nla_put_failure;
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+-      if (ctnetlink_dump_mark(skb, ct) < 0)
++      if (ctnetlink_dump_mark(skb, ct, true) < 0)
+               goto nla_put_failure;
+ #endif
+       if (ctnetlink_dump_labels(skb, ct) < 0)
+-- 
+2.39.2
+
diff --git a/queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch b/queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch
new file mode 100644 (file)
index 0000000..da6c0f2
--- /dev/null
@@ -0,0 +1,83 @@
+From 6381d75ba1ecd932217d7f7d64942ff0f5b7445f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 10:58:56 +0100
+Subject: netfilter: tproxy: fix deadlock due to missing BH disable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 4a02426787bf024dafdb79b362285ee325de3f5e ]
+
+The xtables packet traverser performs an unconditional local_bh_disable(),
+but the nf_tables evaluation loop does not.
+
+Functions that are called from either xtables or nftables must assume
+that they can be called in process context.
+
+inet_twsk_deschedule_put() assumes that no softirq interrupt can occur.
+If tproxy is used from nf_tables its possible that we'll deadlock
+trying to aquire a lock already held in process context.
+
+Add a small helper that takes care of this and use it.
+
+Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/
+Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
+Reported-and-tested-by: Major Dávid <major.david@balasys.hu>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tproxy.h   | 7 +++++++
+ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +-
+ net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +-
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
+index 82d0e41b76f22..faa108b1ba675 100644
+--- a/include/net/netfilter/nf_tproxy.h
++++ b/include/net/netfilter/nf_tproxy.h
+@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
+       return false;
+ }
++static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw)
++{
++      local_bh_disable();
++      inet_twsk_deschedule_put(tw);
++      local_bh_enable();
++}
++
+ /* assign a socket to the skb -- consumes sk */
+ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+ {
+diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+index b2bae0b0e42a1..61cb2341f50fe 100644
+--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+                                           hp->source, lport ? lport : hp->dest,
+                                           skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+               if (sk2) {
+-                      inet_twsk_deschedule_put(inet_twsk(sk));
++                      nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+                       sk = sk2;
+               }
+       }
+diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+index 6bac68fb27a39..3fe4f15e01dc8 100644
+--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
++++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+                                           lport ? lport : hp->dest,
+                                           skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+               if (sk2) {
+-                      inet_twsk_deschedule_put(inet_twsk(sk));
++                      nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+                       sk = sk2;
+               }
+       }
+-- 
+2.39.2
+
diff --git a/queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch b/queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch
new file mode 100644 (file)
index 0000000..469c096
--- /dev/null
@@ -0,0 +1,49 @@
+From ad95ef4d6e4c52faf8606c661e9cd450d5edca1d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 17:30:37 +0800
+Subject: nfc: fdp: add null check of devm_kmalloc_array in
+ fdp_nci_i2c_read_device_properties
+
+From: Kang Chen <void0red@gmail.com>
+
+[ Upstream commit 11f180a5d62a51b484e9648f9b310e1bd50b1a57 ]
+
+devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause
+out-of-bounds write in device_property_read_u8_array later.
+
+Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver")
+Signed-off-by: Kang Chen <void0red@gmail.com>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nfc/fdp/i2c.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
+index 5e300788be525..808d73050afd0 100644
+--- a/drivers/nfc/fdp/i2c.c
++++ b/drivers/nfc/fdp/i2c.c
+@@ -249,6 +249,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
+                                          len, sizeof(**fw_vsc_cfg),
+                                          GFP_KERNEL);
++              if (!*fw_vsc_cfg)
++                      goto alloc_err;
++
+               r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME,
+                                                 *fw_vsc_cfg, len);
+@@ -262,6 +265,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
+               *fw_vsc_cfg = NULL;
+       }
++alloc_err:
+       dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s",
+               *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch b/queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch
new file mode 100644 (file)
index 0000000..bdb4203
--- /dev/null
@@ -0,0 +1,50 @@
+From 695d62e869960ab3f07236095ed9f3f2bb757344 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 25 Feb 2023 21:39:51 -0800
+Subject: platform: x86: MLX_PLATFORM: select REGMAP instead of depending on it
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 7e7e1541c91615e9950d0b96bcd1806d297e970e ]
+
+REGMAP is a hidden (not user visible) symbol. Users cannot set it
+directly thru "make *config", so drivers should select it instead of
+depending on it if they need it.
+
+Consistently using "select" or "depends on" can also help reduce
+Kconfig circular dependency issues.
+
+Therefore, change the use of "depends on REGMAP" to "select REGMAP".
+
+Fixes: ef0f62264b2a ("platform/x86: mlx-platform: Add physical bus number auto detection")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: Vadim Pasternak <vadimp@mellanox.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Hans de Goede <hdegoede@redhat.com>
+Cc: Mark Gross <markgross@kernel.org>
+Cc: platform-driver-x86@vger.kernel.org
+Link: https://lore.kernel.org/r/20230226053953.4681-7-rdunlap@infradead.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
+index a1858689d6e10..84c5b922f245e 100644
+--- a/drivers/platform/x86/Kconfig
++++ b/drivers/platform/x86/Kconfig
+@@ -1195,7 +1195,8 @@ config I2C_MULTI_INSTANTIATE
+ config MLX_PLATFORM
+       tristate "Mellanox Technologies platform support"
+-      depends on I2C && REGMAP
++      depends on I2C
++      select REGMAP
+       help
+         This option enables system support for the Mellanox Technologies
+         platform. The Mellanox systems provide data center networking
+-- 
+2.39.2
+
diff --git a/queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch b/queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch
new file mode 100644 (file)
index 0000000..66d551e
--- /dev/null
@@ -0,0 +1,36 @@
+From 66894d71b0beadb5b792a7b62763eab3a7798844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Feb 2023 17:59:39 +0200
+Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 ]
+
+It looks like U-Boot fails to start the kernel properly when the
+compatible string of the board isn't fsl,T1040RDB, so stop overriding it
+from the rev-a.dts.
+
+Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+index 73f8c998c64df..d4f5f159d6f23 100644
+--- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
++++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+@@ -10,7 +10,6 @@
+ / {
+       model = "fsl,T1040RDB-REV-A";
+-      compatible = "fsl,T1040RDB-REV-A";
+ };
+ &seville_port0 {
+-- 
+2.39.2
+
diff --git a/queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch b/queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch
new file mode 100644 (file)
index 0000000..ad019a5
--- /dev/null
@@ -0,0 +1,57 @@
+From f0d5977c98155a0bed9f1a9093ca9a7e9f2d0f0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Sep 2022 13:00:37 -0700
+Subject: RISC-V: Avoid dereferening NULL regs in die()
+
+From: Palmer Dabbelt <palmer@rivosinc.com>
+
+[ Upstream commit f2913d006fcdb61719635e093d1b5dd0dafecac7 ]
+
+I don't think we can actually die() without a regs pointer, but the
+compiler was warning about a NULL check after a dereference.  It seems
+prudent to just avoid the possibly-NULL dereference, given that when
+die()ing the system is already toast so who knows how we got there.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20220920200037.6727-1-palmer@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Stable-dep-of: 130aee3fd998 ("riscv: Avoid enabling interrupts in die()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index 23fe03ca7ec7b..bc6b30f3add83 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -31,6 +31,7 @@ void die(struct pt_regs *regs, const char *str)
+ {
+       static int die_counter;
+       int ret;
++      long cause;
+       oops_enter();
+@@ -40,11 +41,13 @@ void die(struct pt_regs *regs, const char *str)
+       pr_emerg("%s [#%d]\n", str, ++die_counter);
+       print_modules();
+-      show_regs(regs);
++      if (regs)
++              show_regs(regs);
+-      ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV);
++      cause = regs ? regs->cause : -1;
++      ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV);
+-      if (regs && kexec_should_crash(current))
++      if (kexec_should_crash(current))
+               crash_kexec(regs);
+       bust_spinlocks(0);
+-- 
+2.39.2
+
diff --git a/queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch b/queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch
new file mode 100644 (file)
index 0000000..2ce5412
--- /dev/null
@@ -0,0 +1,163 @@
+From 08ee349fb46fb31600fbf34244e65b4378ea2d65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 14:37:55 +0000
+Subject: RISC-V: Don't check text_mutex during stop_machine
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+[ Upstream commit 2a8db5ec4a28a0fce822d10224db9471a44b6925 ]
+
+We're currently using stop_machine() to update ftrace & kprobes, which
+means that the thread that takes text_mutex during may not be the same
+as the thread that eventually patches the code.  This isn't actually a
+race because the lock is still held (preventing any other concurrent
+accesses) and there is only one thread running during stop_machine(),
+but it does trigger a lockdep failure.
+
+This patch just elides the lockdep check during stop_machine.
+
+Fixes: c15ac4fd60d5 ("riscv/ftrace: Add dynamic function tracer support")
+Suggested-by: Steven Rostedt <rostedt@goodmis.org>
+Reported-by: Changbin Du <changbin.du@gmail.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230303143754.4005217-1-conor.dooley@microchip.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/ftrace.h |  2 +-
+ arch/riscv/include/asm/patch.h  |  2 ++
+ arch/riscv/kernel/ftrace.c      | 14 ++++++++++++--
+ arch/riscv/kernel/patch.c       | 28 +++++++++++++++++++++++++---
+ 4 files changed, 40 insertions(+), 6 deletions(-)
+
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index 9e73922e1e2e5..d47d87c2d7e3d 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+ #define ftrace_init_nop ftrace_init_nop
+ #endif
+-#endif
++#endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* _ASM_RISCV_FTRACE_H */
+diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
+index 9a7d7346001ee..98d9de07cba17 100644
+--- a/arch/riscv/include/asm/patch.h
++++ b/arch/riscv/include/asm/patch.h
+@@ -9,4 +9,6 @@
+ int patch_text_nosync(void *addr, const void *insns, size_t len);
+ int patch_text(void *addr, u32 insn);
++extern int riscv_patch_in_stop_machine;
++
+ #endif /* _ASM_RISCV_PATCH_H */
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 47b43d8ee9a6c..1bf92cfa6764e 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -15,11 +15,21 @@
+ int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+ {
+       mutex_lock(&text_mutex);
++
++      /*
++       * The code sequences we use for ftrace can't be patched while the
++       * kernel is running, so we need to use stop_machine() to modify them
++       * for now.  This doesn't play nice with text_mutex, we use this flag
++       * to elide the check.
++       */
++      riscv_patch_in_stop_machine = true;
++
+       return 0;
+ }
+ int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+ {
++      riscv_patch_in_stop_machine = false;
+       mutex_unlock(&text_mutex);
+       return 0;
+ }
+@@ -109,9 +119,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ {
+       int out;
+-      ftrace_arch_code_modify_prepare();
++      mutex_lock(&text_mutex);
+       out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+-      ftrace_arch_code_modify_post_process();
++      mutex_unlock(&text_mutex);
+       return out;
+ }
+diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
+index 1612e11f7bf6d..c3fced410e742 100644
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -11,6 +11,7 @@
+ #include <asm/kprobes.h>
+ #include <asm/cacheflush.h>
+ #include <asm/fixmap.h>
++#include <asm/ftrace.h>
+ #include <asm/patch.h>
+ struct patch_insn {
+@@ -19,6 +20,8 @@ struct patch_insn {
+       atomic_t cpu_count;
+ };
++int riscv_patch_in_stop_machine = false;
++
+ #ifdef CONFIG_MMU
+ static void *patch_map(void *addr, int fixmap)
+ {
+@@ -55,8 +58,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
+        * Before reaching here, it was expected to lock the text_mutex
+        * already, so we don't need to give another lock here and could
+        * ensure that it was safe between each cores.
++       *
++       * We're currently using stop_machine() for ftrace & kprobes, and while
++       * that ensures text_mutex is held before installing the mappings it
++       * does not ensure text_mutex is held by the calling thread.  That's
++       * safe but triggers a lockdep failure, so just elide it for that
++       * specific case.
+        */
+-      lockdep_assert_held(&text_mutex);
++      if (!riscv_patch_in_stop_machine)
++              lockdep_assert_held(&text_mutex);
+       if (across_pages)
+               patch_map(addr + len, FIX_TEXT_POKE1);
+@@ -117,13 +127,25 @@ NOKPROBE_SYMBOL(patch_text_cb);
+ int patch_text(void *addr, u32 insn)
+ {
++      int ret;
+       struct patch_insn patch = {
+               .addr = addr,
+               .insn = insn,
+               .cpu_count = ATOMIC_INIT(0),
+       };
+-      return stop_machine_cpuslocked(patch_text_cb,
+-                                     &patch, cpu_online_mask);
++      /*
++       * kprobes takes text_mutex, before calling patch_text(), but as we call
++       * calls stop_machine(), the lockdep assertion in patch_insn_write()
++       * gets confused by the context in which the lock is taken.
++       * Instead, ensure the lock is held before calling stop_machine(), and
++       * set riscv_patch_in_stop_machine to skip the check in
++       * patch_insn_write().
++       */
++      lockdep_assert_held(&text_mutex);
++      riscv_patch_in_stop_machine = true;
++      ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask);
++      riscv_patch_in_stop_machine = false;
++      return ret;
+ }
+ NOKPROBE_SYMBOL(patch_text);
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-add-header-include-guards-to-insn.h.patch b/queue-5.10/riscv-add-header-include-guards-to-insn.h.patch
new file mode 100644 (file)
index 0000000..1c32e5d
--- /dev/null
@@ -0,0 +1,48 @@
+From 02ddbaf7ea1c9a770160c69a31629798b39c46c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Jan 2023 17:42:42 +0800
+Subject: riscv: Add header include guards to insn.h
+
+From: Liao Chang <liaochang1@huawei.com>
+
+[ Upstream commit 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 ]
+
+Add header include guards to insn.h to prevent repeating declaration of
+any identifiers in insn.h.
+
+Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB")
+Signed-off-by: Liao Chang <liaochang1@huawei.com>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/parse_asm.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/riscv/include/asm/parse_asm.h b/arch/riscv/include/asm/parse_asm.h
+index 7fee806805c1b..ad254da85e615 100644
+--- a/arch/riscv/include/asm/parse_asm.h
++++ b/arch/riscv/include/asm/parse_asm.h
+@@ -3,6 +3,9 @@
+  * Copyright (C) 2020 SiFive
+  */
++#ifndef _ASM_RISCV_INSN_H
++#define _ASM_RISCV_INSN_H
++
+ #include <linux/bits.h>
+ /* The bit field of immediate value in I-type instruction */
+@@ -217,3 +220,5 @@ static inline bool is_ ## INSN_NAME ## _insn(long insn) \
+       (RVC_X(x_, RVC_B_IMM_5_OPOFF, RVC_B_IMM_5_MASK) << RVC_B_IMM_5_OFF) | \
+       (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \
+       (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); })
++
++#endif /* _ASM_RISCV_INSN_H */
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch b/queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch
new file mode 100644 (file)
index 0000000..5828d9b
--- /dev/null
@@ -0,0 +1,60 @@
+From a3ec8c84210a077e35f797833016d5e0e8b27e62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 14:48:28 +0000
+Subject: riscv: Avoid enabling interrupts in die()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mattias Nissler <mnissler@rivosinc.com>
+
+[ Upstream commit 130aee3fd9981297ff9354e5d5609cd59aafbbea ]
+
+While working on something else, I noticed that the kernel would start
+accepting interrupts again after crashing in an interrupt handler. Since
+the kernel is already in inconsistent state, enabling interrupts is
+dangerous and opens up risk of kernel state deteriorating further.
+Interrupts do get enabled via what looks like an unintended side effect of
+spin_unlock_irq, so switch to the more cautious
+spin_lock_irqsave/spin_unlock_irqrestore instead.
+
+Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
+Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
+Reviewed-by: Björn Töpel <bjorn@kernel.org>
+Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index bc6b30f3add83..227253fde33c4 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -32,10 +32,11 @@ void die(struct pt_regs *regs, const char *str)
+       static int die_counter;
+       int ret;
+       long cause;
++      unsigned long flags;
+       oops_enter();
+-      spin_lock_irq(&die_lock);
++      spin_lock_irqsave(&die_lock, flags);
+       console_verbose();
+       bust_spinlocks(1);
+@@ -52,7 +53,7 @@ void die(struct pt_regs *regs, const char *str)
+       bust_spinlocks(0);
+       add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+-      spin_unlock_irq(&die_lock);
++      spin_unlock_irqrestore(&die_lock, flags);
+       oops_exit();
+       if (in_interrupt())
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch b/queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch
new file mode 100644 (file)
index 0000000..a334c94
--- /dev/null
@@ -0,0 +1,447 @@
+From 584f80a2b4caa34c1809e9356be1b8300aa2923e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 04:05:59 -0500
+Subject: riscv: ftrace: Reduce the detour code size to half
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 ]
+
+Use a temporary register to reduce the size of detour code from 16 bytes to
+8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv:
+Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'.
+
+Before the patch:
+<func_prolog>:
+ 0: REG_S  ra, -SZREG(sp)
+ 4: auipc  ra, ?
+ 8: jalr   ?(ra)
+12: REG_L  ra, -SZREG(sp)
+ (func_boddy)
+
+After the patch:
+<func_prolog>:
+ 0: auipc  t0, ?
+ 4: jalr   t0, ?(t0)
+ (func_boddy)
+
+This patch not just reduces the size of detour code, but also fixes an
+important issue:
+
+An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can
+actually change the instruction pointer, e.g. to "replace" the given
+kernel function with a new one, which is needed for livepatching, etc.
+
+In this case, the trampoline (ftrace_regs_caller) would not return to
+<func_prolog+12> but would rather jump to the new function. So, "REG_L
+ra, -SZREG(sp)" would not run and the original return address would not
+be restored. The kernel is likely to hang or crash as a result.
+
+This can be easily demonstrated if one tries to "replace", say,
+cmdline_proc_show() with a new function with the same signature using
+instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace
+callback.
+
+Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/
+Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/
+Co-developed-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Reviewed-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 10626c32e382 ("riscv/ftrace: Add basic support")
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile             |  4 +-
+ arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++------
+ arch/riscv/kernel/ftrace.c      | 65 ++++++++++-----------------------
+ arch/riscv/kernel/mcount-dyn.S  | 42 ++++++++-------------
+ 4 files changed, 75 insertions(+), 86 deletions(-)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 3e3467dbbf73f..8ceb667e4f38c 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -14,9 +14,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+       LDFLAGS_vmlinux := --no-relax
+       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ ifeq ($(CONFIG_RISCV_ISA_C),y)
+-      CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+-else
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++      CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ endif
+ endif
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index 04dad33800418..9e73922e1e2e5 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+  * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+  *          return address (original pc + 4)
+  *
++ *<ftrace enable>:
++ * 0: auipc  t0/ra, 0x?
++ * 4: jalr   t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+  * Dynamic ftrace generates probes to call sites, so we must deal with
+  * both auipc and jalr at the same time.
+  */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK     (0xfffff000)
+ #define AUIPC_PAD             (0x00001000)
+ #define JALR_SHIFT            20
+-#define JALR_BASIC            (0x000080e7)
+-#define AUIPC_BASIC           (0x00000097)
++#define JALR_RA                       (0x000080e7)
++#define AUIPC_RA              (0x00000097)
++#define JALR_T0                       (0x000282e7)
++#define AUIPC_T0              (0x00000297)
+ #define NOP4                  (0x00000013)
+-#define make_call(caller, callee, call)                                       \
++#define to_jalr_t0(offset)                                            \
++      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset)                                           \
++      ((offset & JALR_SIGN_MASK) ?                                    \
++      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) :       \
++      ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call)                            \
+ do {                                                                  \
+-      call[0] = to_auipc_insn((unsigned int)((unsigned long)callee -  \
+-                              (unsigned long)caller));                \
+-      call[1] = to_jalr_insn((unsigned int)((unsigned long)callee -   \
+-                             (unsigned long)caller));                 \
++      unsigned int offset =                                           \
++              (unsigned long) callee - (unsigned long) caller;        \
++      call[0] = to_auipc_t0(offset);                                  \
++      call[1] = to_jalr_t0(offset);                                   \
+ } while (0)
+-#define to_jalr_insn(offset)                                          \
+-      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset)                                            \
++      (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+-#define to_auipc_insn(offset)                                         \
++#define to_auipc_ra(offset)                                           \
+       ((offset & JALR_SIGN_MASK) ?                                    \
+-      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) :    \
+-      ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++      (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) :       \
++      ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call)                            \
++do {                                                                  \
++      unsigned int offset =                                           \
++              (unsigned long) callee - (unsigned long) caller;        \
++      call[0] = to_auipc_ra(offset);                                  \
++      call[1] = to_jalr_ra(offset);                                   \
++} while (0)
+ /*
+  * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 7f1e5203de886..47b43d8ee9a6c 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -57,12 +57,15 @@ static int ftrace_check_current_call(unsigned long hook_pos,
+ }
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+-                              bool enable)
++                              bool enable, bool ra)
+ {
+       unsigned int call[2];
+       unsigned int nops[2] = {NOP4, NOP4};
+-      make_call(hook_pos, target, call);
++      if (ra)
++              make_call_ra(hook_pos, target, call);
++      else
++              make_call_t0(hook_pos, target, call);
+       /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+       if (patch_text_nosync
+@@ -72,42 +75,13 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+       return 0;
+ }
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S  ra, -SZREG(sp)
+- * 1: auipc  ra, 0x?
+- * 2: jalr   -?(ra)
+- * 3: REG_L  ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE       16
+-#define FUNC_ENTRY_JMP        4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+-      unsigned int call[4] = {INSN0, 0, 0, INSN3};
+-      unsigned long target = addr;
+-      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++      unsigned int call[2];
+-      call[1] = to_auipc_insn((unsigned int)(target - caller));
+-      call[2] = to_jalr_insn((unsigned int)(target - caller));
++      make_call_t0(rec->ip, addr, call);
+-      if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++      if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+               return -EPERM;
+       return 0;
+@@ -116,15 +90,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+                   unsigned long addr)
+ {
+-      unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++      unsigned int nops[2] = {NOP4, NOP4};
+-      if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++      if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+               return -EPERM;
+       return 0;
+ }
+-
+ /*
+  * This is called early on, and isn't wrapped by
+  * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -146,10 +119,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+       int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+-                                     (unsigned long)func, true);
++                                     (unsigned long)func, true, true);
+       if (!ret) {
+               ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+-                                         (unsigned long)func, true);
++                                         (unsigned long)func, true, true);
+       }
+       return ret;
+@@ -166,16 +139,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+                      unsigned long addr)
+ {
+       unsigned int call[2];
+-      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++      unsigned long caller = rec->ip;
+       int ret;
+-      make_call(caller, old_addr, call);
++      make_call_t0(caller, old_addr, call);
+       ret = ftrace_check_current_call(caller, call);
+       if (ret)
+               return ret;
+-      return __ftrace_modify_call(caller, addr, true);
++      return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+@@ -210,12 +183,12 @@ int ftrace_enable_ftrace_graph_caller(void)
+       int ret;
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+-                                  (unsigned long)&prepare_ftrace_return, true);
++                                  (unsigned long)&prepare_ftrace_return, true, true);
+       if (ret)
+               return ret;
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+-                                  (unsigned long)&prepare_ftrace_return, true);
++                                  (unsigned long)&prepare_ftrace_return, true, true);
+ }
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -223,12 +196,12 @@ int ftrace_disable_ftrace_graph_caller(void)
+       int ret;
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+-                                  (unsigned long)&prepare_ftrace_return, false);
++                                  (unsigned long)&prepare_ftrace_return, false, true);
+       if (ret)
+               return ret;
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+-                                  (unsigned long)&prepare_ftrace_return, false);
++                                  (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
+index d171eca623b6f..125de818d1bab 100644
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+       .text
+-#define FENTRY_RA_OFFSET      12
+-#define ABI_SIZE_ON_STACK     72
++#define FENTRY_RA_OFFSET      8
++#define ABI_SIZE_ON_STACK     80
+ #define ABI_A0                        0
+ #define ABI_A1                        8
+ #define ABI_A2                        16
+@@ -23,10 +23,10 @@
+ #define ABI_A5                        40
+ #define ABI_A6                        48
+ #define ABI_A7                        56
+-#define ABI_RA                        64
++#define ABI_T0                        64
++#define ABI_RA                        72
+       .macro SAVE_ABI
+-      addi    sp, sp, -SZREG
+       addi    sp, sp, -ABI_SIZE_ON_STACK
+       REG_S   a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+       REG_S   a5, ABI_A5(sp)
+       REG_S   a6, ABI_A6(sp)
+       REG_S   a7, ABI_A7(sp)
++      REG_S   t0, ABI_T0(sp)
+       REG_S   ra, ABI_RA(sp)
+       .endm
+@@ -49,24 +50,18 @@
+       REG_L   a5, ABI_A5(sp)
+       REG_L   a6, ABI_A6(sp)
+       REG_L   a7, ABI_A7(sp)
++      REG_L   t0, ABI_T0(sp)
+       REG_L   ra, ABI_RA(sp)
+       addi    sp, sp, ABI_SIZE_ON_STACK
+-      addi    sp, sp, SZREG
+       .endm
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+       .macro SAVE_ALL
+-      addi    sp, sp, -SZREG
+       addi    sp, sp, -PT_SIZE_ON_STACK
+-      REG_S x1,  PT_EPC(sp)
+-      addi    sp, sp, PT_SIZE_ON_STACK
+-      REG_L x1,  (sp)
+-      addi    sp, sp, -PT_SIZE_ON_STACK
++      REG_S t0,  PT_EPC(sp)
+       REG_S x1,  PT_RA(sp)
+-      REG_L x1,  PT_EPC(sp)
+-
+       REG_S x2,  PT_SP(sp)
+       REG_S x3,  PT_GP(sp)
+       REG_S x4,  PT_TP(sp)
+@@ -100,15 +95,11 @@
+       .endm
+       .macro RESTORE_ALL
++      REG_L t0,  PT_EPC(sp)
+       REG_L x1,  PT_RA(sp)
+-      addi    sp, sp, PT_SIZE_ON_STACK
+-      REG_S x1,  (sp)
+-      addi    sp, sp, -PT_SIZE_ON_STACK
+-      REG_L x1,  PT_EPC(sp)
+       REG_L x2,  PT_SP(sp)
+       REG_L x3,  PT_GP(sp)
+       REG_L x4,  PT_TP(sp)
+-      REG_L x5,  PT_T0(sp)
+       REG_L x6,  PT_T1(sp)
+       REG_L x7,  PT_T2(sp)
+       REG_L x8,  PT_S0(sp)
+@@ -137,17 +128,16 @@
+       REG_L x31, PT_T6(sp)
+       addi    sp, sp, PT_SIZE_ON_STACK
+-      addi    sp, sp, SZREG
+       .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+ ENTRY(ftrace_caller)
+       SAVE_ABI
+-      addi    a0, ra, -FENTRY_RA_OFFSET
++      addi    a0, t0, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+-      REG_L   a1, ABI_SIZE_ON_STACK(sp)
++      mv      a1, ra
+       mv      a3, sp
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+       call    ftrace_stub
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      addi    a0, sp, ABI_SIZE_ON_STACK
+-      REG_L   a1, ABI_RA(sp)
++      addi    a0, sp, ABI_RA
++      REG_L   a1, ABI_T0(sp)
+       addi    a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       mv      a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+       call    ftrace_stub
+ #endif
+       RESTORE_ABI
+-      ret
++      jr t0
+ ENDPROC(ftrace_caller)
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+       SAVE_ALL
+-      addi    a0, ra, -FENTRY_RA_OFFSET
++      addi    a0, t0, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+-      REG_L   a1, PT_SIZE_ON_STACK(sp)
++      mv      a1, ra
+       mv      a3, sp
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+       RESTORE_ALL
+-      ret
++      jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch b/queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
new file mode 100644 (file)
index 0000000..44201fe
--- /dev/null
@@ -0,0 +1,60 @@
+From 008af635bed0d4f0d83564b2b3986ed14472119b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 04:05:58 -0500
+Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 409c8fb20c66df7150e592747412438c04aeb11f ]
+
+When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate
+more nops than we expect. Because it treat nop opcode as 0x00000013
+instead of 0x0001.
+
+Dump of assembler code for function dw_pcie_free_msi:
+   0xffffffff806fce94 <+0>:     sd      ra,-8(sp)
+   0xffffffff806fce98 <+4>:     auipc   ra,0xff90f
+   0xffffffff806fce9c <+8>:     jalr    -684(ra) # 0xffffffff8000bbec
+<ftrace_caller>
+   0xffffffff806fcea0 <+12>:    ld      ra,-8(sp)
+   0xffffffff806fcea4 <+16>:    nop /* wasted */
+   0xffffffff806fcea8 <+20>:    nop /* wasted */
+   0xffffffff806fceac <+24>:    nop /* wasted */
+   0xffffffff806fceb0 <+28>:    nop /* wasted */
+   0xffffffff806fceb4 <+0>:     addi    sp,sp,-48
+   0xffffffff806fceb8 <+4>:     sd      s0,32(sp)
+   0xffffffff806fcebc <+8>:     sd      s1,24(sp)
+   0xffffffff806fcec0 <+12>:    sd      s2,16(sp)
+   0xffffffff806fcec4 <+16>:    sd      s3,8(sp)
+   0xffffffff806fcec8 <+20>:    sd      ra,40(sp)
+   0xffffffff806fcecc <+24>:    addi    s0,sp,48
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 6c1ef42d5a0df..3e3467dbbf73f 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -13,7 +13,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+       LDFLAGS_vmlinux := --no-relax
+       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ifeq ($(CONFIG_RISCV_ISA_C),y)
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++else
++      CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++endif
+ endif
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch b/queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch
new file mode 100644 (file)
index 0000000..ebb2eb0
--- /dev/null
@@ -0,0 +1,99 @@
+From 0675e74ed638f0e92597d9b136e83ee75cf74541 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Mar 2023 10:16:39 +0100
+Subject: riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit 76950340cf03b149412fe0d5f0810e52ac1df8cb ]
+
+When CONFIG_FRAME_POINTER is unset, the stack unwinding function
+walk_stackframe randomly reads the stack and then, when KASAN is enabled,
+it can lead to the following backtrace:
+
+[    0.000000] ==================================================================
+[    0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a
+[    0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0
+[    0.000000]
+[    0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43
+[    0.000000] Hardware name: riscv-virtio,qemu (DT)
+[    0.000000] Call Trace:
+[    0.000000] [<ffffffff80007ba8>] walk_stackframe+0x0/0x11a
+[    0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a
+[    0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[    0.000000] [<ffffffff80c49c80>] dump_stack_lvl+0x22/0x36
+[    0.000000] [<ffffffff80c3783e>] print_report+0x198/0x4a8
+[    0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a
+[    0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[    0.000000] [<ffffffff8015f68a>] kasan_report+0x9a/0xc8
+[    0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[    0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[    0.000000] [<ffffffff8006e99c>] desc_make_final+0x80/0x84
+[    0.000000] [<ffffffff8009a04e>] stack_trace_save+0x88/0xa6
+[    0.000000] [<ffffffff80099fc2>] filter_irq_stacks+0x72/0x76
+[    0.000000] [<ffffffff8006b95e>] devkmsg_read+0x32a/0x32e
+[    0.000000] [<ffffffff8015ec16>] kasan_save_stack+0x28/0x52
+[    0.000000] [<ffffffff8006e998>] desc_make_final+0x7c/0x84
+[    0.000000] [<ffffffff8009a04a>] stack_trace_save+0x84/0xa6
+[    0.000000] [<ffffffff8015ec52>] kasan_set_track+0x12/0x20
+[    0.000000] [<ffffffff8015f22e>] __kasan_slab_alloc+0x58/0x5e
+[    0.000000] [<ffffffff8015e7ea>] __kmem_cache_create+0x21e/0x39a
+[    0.000000] [<ffffffff80e133ac>] create_boot_cache+0x70/0x9c
+[    0.000000] [<ffffffff80e17ab2>] kmem_cache_init+0x6c/0x11e
+[    0.000000] [<ffffffff80e00fd6>] mm_init+0xd8/0xfe
+[    0.000000] [<ffffffff80e011d8>] start_kernel+0x190/0x3ca
+[    0.000000]
+[    0.000000] The buggy address belongs to stack of task swapper/0
+[    0.000000]  and is located at offset 0 in frame:
+[    0.000000]  stack_trace_save+0x0/0xa6
+[    0.000000]
+[    0.000000] This frame has 1 object:
+[    0.000000]  [32, 56) 'c'
+[    0.000000]
+[    0.000000] The buggy address belongs to the physical page:
+[    0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07
+[    0.000000] flags: 0x1000(reserved|zone=0)
+[    0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000
+[    0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff
+[    0.000000] page dumped because: kasan: bad access detected
+[    0.000000]
+[    0.000000] Memory state around the buggy address:
+[    0.000000]  ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[    0.000000]  ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[    0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3
+[    0.000000]                                            ^
+[    0.000000]  ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+[    0.000000]  ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[    0.000000] ==================================================================
+
+Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise
+mode.
+
+Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
+Reported-by: Chathura Rajapaksha <chathura.abeyrathne.lk@gmail.com>
+Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/
+Suggested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 1e53fbe5eb783..9c34735c1e771 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -96,7 +96,7 @@ void notrace walk_stackframe(struct task_struct *task,
+       while (!kstack_end(ksp)) {
+               if (__kernel_text_address(pc) && unlikely(fn(pc, arg)))
+                       break;
+-              pc = (*ksp++) - 0x4;
++              pc = READ_ONCE_NOCHECK(*ksp++) - 0x4;
+       }
+ }
+-- 
+2.39.2
+
diff --git a/queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch b/queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch
new file mode 100644 (file)
index 0000000..ee0f5fe
--- /dev/null
@@ -0,0 +1,600 @@
+From abb0542c5e210048f5905f02935615dc315a5240 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Dec 2020 16:01:41 +0000
+Subject: riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit afc76b8b80112189b6f11e67e19cf58301944814 ]
+
+This patch changes the current detour mechanism of dynamic ftrace
+which has been discussed during LPC 2020 RISCV-MC [1].
+
+Before the patch, we used mcount for detour:
+<funca>:
+       addi sp,sp,-16
+       sd   ra,8(sp)
+       sd   s0,0(sp)
+       addi s0,sp,16
+       mv   a5,ra
+       mv   a0,a5
+       auipc ra,0x0 -> nop
+       jalr  -296(ra) <_mcount@plt> ->nop
+       ...
+
+After the patch, we use nop call site area for detour:
+<funca>:
+       nop -> REG_S ra, -SZREG(sp)
+       nop -> auipc ra, 0x?
+       nop -> jalr ?(ra)
+       nop -> REG_L ra, -SZREG(sp)
+       ...
+
+The mcount mechanism is mixed with gcc function prologue which is
+not very clear. The patchable function entry just put 16 bytes nop
+before the front of the function prologue which could be filled
+with a separated detour mechanism.
+
+[1] https://www.linuxplumbersconf.org/event/7/contributions/807/
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Stable-dep-of: 409c8fb20c66 ("riscv: ftrace: Remove wasted nops for !RISCV_ISA_C")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile            |   2 +
+ arch/riscv/kernel/ftrace.c     |  95 ++++-----
+ arch/riscv/kernel/mcount-dyn.S | 342 +++++++++++++++------------------
+ 3 files changed, 204 insertions(+), 235 deletions(-)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 9446282b52bab..6c1ef42d5a0df 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -12,6 +12,8 @@ OBJCOPYFLAGS    := -O binary
+ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+       LDFLAGS_vmlinux := --no-relax
++      KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++      CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+ endif
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 765b62434f303..7f1e5203de886 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+       return 0;
+ }
++/*
++ * Put 5 instructions with 16 bytes at the front of function within
++ * patchable function entry nops' area.
++ *
++ * 0: REG_S  ra, -SZREG(sp)
++ * 1: auipc  ra, 0x?
++ * 2: jalr   -?(ra)
++ * 3: REG_L  ra, -SZREG(sp)
++ *
++ * So the opcodes is:
++ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
++ * 1: 0x???????? -> auipc
++ * 2: 0x???????? -> jalr
++ * 3: 0xff813083 (ld)/0xffc12083 (lw)
++ */
++#if __riscv_xlen == 64
++#define INSN0 0xfe113c23
++#define INSN3 0xff813083
++#elif __riscv_xlen == 32
++#define INSN0 0xfe112e23
++#define INSN3 0xffc12083
++#endif
++
++#define FUNC_ENTRY_SIZE       16
++#define FUNC_ENTRY_JMP        4
++
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+-      int ret = ftrace_check_current_call(rec->ip, NULL);
++      unsigned int call[4] = {INSN0, 0, 0, INSN3};
++      unsigned long target = addr;
++      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
+-      if (ret)
+-              return ret;
++      call[1] = to_auipc_insn((unsigned int)(target - caller));
++      call[2] = to_jalr_insn((unsigned int)(target - caller));
+-      return __ftrace_modify_call(rec->ip, addr, true);
++      if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++              return -EPERM;
++
++      return 0;
+ }
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+                   unsigned long addr)
+ {
+-      unsigned int call[2];
+-      int ret;
++      unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
+-      make_call(rec->ip, addr, call);
+-      ret = ftrace_check_current_call(rec->ip, call);
+-
+-      if (ret)
+-              return ret;
++      if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++              return -EPERM;
+-      return __ftrace_modify_call(rec->ip, addr, false);
++      return 0;
+ }
+@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+                      unsigned long addr)
+ {
+       unsigned int call[2];
++      unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
+       int ret;
+-      make_call(rec->ip, old_addr, call);
+-      ret = ftrace_check_current_call(rec->ip, call);
++      make_call(caller, old_addr, call);
++      ret = ftrace_check_current_call(caller, call);
+       if (ret)
+               return ret;
+-      return __ftrace_modify_call(rec->ip, addr, true);
++      return __ftrace_modify_call(caller, addr, true);
+ }
+ #endif
+@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ extern void ftrace_graph_call(void);
++extern void ftrace_graph_regs_call(void);
+ int ftrace_enable_ftrace_graph_caller(void)
+ {
+-      unsigned int call[2];
+-      static int init_graph = 1;
+       int ret;
+-      make_call(&ftrace_graph_call, &ftrace_stub, call);
+-
+-      /*
+-       * When enabling graph tracer for the first time, ftrace_graph_call
+-       * should contains a call to ftrace_stub.  Once it has been disabled,
+-       * the 8-bytes at the position becomes NOPs.
+-       */
+-      if (init_graph) {
+-              ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+-                                              call);
+-              init_graph = 0;
+-      } else {
+-              ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+-                                              NULL);
+-      }
+-
++      ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++                                  (unsigned long)&prepare_ftrace_return, true);
+       if (ret)
+               return ret;
+-      return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++      return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+                                   (unsigned long)&prepare_ftrace_return, true);
+ }
+ int ftrace_disable_ftrace_graph_caller(void)
+ {
+-      unsigned int call[2];
+       int ret;
+-      make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
+-
+-      /*
+-       * This is to make sure that ftrace_enable_ftrace_graph_caller
+-       * did the right thing.
+-       */
+-      ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+-                                      call);
+-
++      ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++                                  (unsigned long)&prepare_ftrace_return, false);
+       if (ret)
+               return ret;
+-      return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++      return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+                                   (unsigned long)&prepare_ftrace_return, false);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
+index 35a6ed76cb8b7..d171eca623b6f 100644
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,224 +13,186 @@
+       .text
+-      .macro SAVE_ABI_STATE
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      addi    sp, sp, -48
+-      sd      s0, 32(sp)
+-      sd      ra, 40(sp)
+-      addi    s0, sp, 48
+-      sd      t0, 24(sp)
+-      sd      t1, 16(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+-      sd      t2, 8(sp)
+-#endif
+-#else
+-      addi    sp, sp, -16
+-      sd      s0, 0(sp)
+-      sd      ra, 8(sp)
+-      addi    s0, sp, 16
+-#endif
++#define FENTRY_RA_OFFSET      12
++#define ABI_SIZE_ON_STACK     72
++#define ABI_A0                        0
++#define ABI_A1                        8
++#define ABI_A2                        16
++#define ABI_A3                        24
++#define ABI_A4                        32
++#define ABI_A5                        40
++#define ABI_A6                        48
++#define ABI_A7                        56
++#define ABI_RA                        64
++
++      .macro SAVE_ABI
++      addi    sp, sp, -SZREG
++      addi    sp, sp, -ABI_SIZE_ON_STACK
++
++      REG_S   a0, ABI_A0(sp)
++      REG_S   a1, ABI_A1(sp)
++      REG_S   a2, ABI_A2(sp)
++      REG_S   a3, ABI_A3(sp)
++      REG_S   a4, ABI_A4(sp)
++      REG_S   a5, ABI_A5(sp)
++      REG_S   a6, ABI_A6(sp)
++      REG_S   a7, ABI_A7(sp)
++      REG_S   ra, ABI_RA(sp)
+       .endm
+-      .macro RESTORE_ABI_STATE
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      ld      s0, 32(sp)
+-      ld      ra, 40(sp)
+-      addi    sp, sp, 48
+-#else
+-      ld      ra, 8(sp)
+-      ld      s0, 0(sp)
+-      addi    sp, sp, 16
+-#endif
++      .macro RESTORE_ABI
++      REG_L   a0, ABI_A0(sp)
++      REG_L   a1, ABI_A1(sp)
++      REG_L   a2, ABI_A2(sp)
++      REG_L   a3, ABI_A3(sp)
++      REG_L   a4, ABI_A4(sp)
++      REG_L   a5, ABI_A5(sp)
++      REG_L   a6, ABI_A6(sp)
++      REG_L   a7, ABI_A7(sp)
++      REG_L   ra, ABI_RA(sp)
++
++      addi    sp, sp, ABI_SIZE_ON_STACK
++      addi    sp, sp, SZREG
+       .endm
+-      .macro RESTORE_GRAPH_ARGS
+-      ld      a0, 24(sp)
+-      ld      a1, 16(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+-      ld      a2, 8(sp)
+-#endif
++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
++      .macro SAVE_ALL
++      addi    sp, sp, -SZREG
++      addi    sp, sp, -PT_SIZE_ON_STACK
++
++      REG_S x1,  PT_EPC(sp)
++      addi    sp, sp, PT_SIZE_ON_STACK
++      REG_L x1,  (sp)
++      addi    sp, sp, -PT_SIZE_ON_STACK
++      REG_S x1,  PT_RA(sp)
++      REG_L x1,  PT_EPC(sp)
++
++      REG_S x2,  PT_SP(sp)
++      REG_S x3,  PT_GP(sp)
++      REG_S x4,  PT_TP(sp)
++      REG_S x5,  PT_T0(sp)
++      REG_S x6,  PT_T1(sp)
++      REG_S x7,  PT_T2(sp)
++      REG_S x8,  PT_S0(sp)
++      REG_S x9,  PT_S1(sp)
++      REG_S x10, PT_A0(sp)
++      REG_S x11, PT_A1(sp)
++      REG_S x12, PT_A2(sp)
++      REG_S x13, PT_A3(sp)
++      REG_S x14, PT_A4(sp)
++      REG_S x15, PT_A5(sp)
++      REG_S x16, PT_A6(sp)
++      REG_S x17, PT_A7(sp)
++      REG_S x18, PT_S2(sp)
++      REG_S x19, PT_S3(sp)
++      REG_S x20, PT_S4(sp)
++      REG_S x21, PT_S5(sp)
++      REG_S x22, PT_S6(sp)
++      REG_S x23, PT_S7(sp)
++      REG_S x24, PT_S8(sp)
++      REG_S x25, PT_S9(sp)
++      REG_S x26, PT_S10(sp)
++      REG_S x27, PT_S11(sp)
++      REG_S x28, PT_T3(sp)
++      REG_S x29, PT_T4(sp)
++      REG_S x30, PT_T5(sp)
++      REG_S x31, PT_T6(sp)
+       .endm
+-ENTRY(ftrace_graph_caller)
+-      addi    sp, sp, -16
+-      sd      s0, 0(sp)
+-      sd      ra, 8(sp)
+-      addi    s0, sp, 16
+-ftrace_graph_call:
+-      .global ftrace_graph_call
+-      /*
+-       * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
+-       * call below.  Check ftrace_modify_all_code for details.
+-       */
+-      call    ftrace_stub
+-      ld      ra, 8(sp)
+-      ld      s0, 0(sp)
+-      addi    sp, sp, 16
+-      ret
+-ENDPROC(ftrace_graph_caller)
++      .macro RESTORE_ALL
++      REG_L x1,  PT_RA(sp)
++      addi    sp, sp, PT_SIZE_ON_STACK
++      REG_S x1,  (sp)
++      addi    sp, sp, -PT_SIZE_ON_STACK
++      REG_L x1,  PT_EPC(sp)
++      REG_L x2,  PT_SP(sp)
++      REG_L x3,  PT_GP(sp)
++      REG_L x4,  PT_TP(sp)
++      REG_L x5,  PT_T0(sp)
++      REG_L x6,  PT_T1(sp)
++      REG_L x7,  PT_T2(sp)
++      REG_L x8,  PT_S0(sp)
++      REG_L x9,  PT_S1(sp)
++      REG_L x10, PT_A0(sp)
++      REG_L x11, PT_A1(sp)
++      REG_L x12, PT_A2(sp)
++      REG_L x13, PT_A3(sp)
++      REG_L x14, PT_A4(sp)
++      REG_L x15, PT_A5(sp)
++      REG_L x16, PT_A6(sp)
++      REG_L x17, PT_A7(sp)
++      REG_L x18, PT_S2(sp)
++      REG_L x19, PT_S3(sp)
++      REG_L x20, PT_S4(sp)
++      REG_L x21, PT_S5(sp)
++      REG_L x22, PT_S6(sp)
++      REG_L x23, PT_S7(sp)
++      REG_L x24, PT_S8(sp)
++      REG_L x25, PT_S9(sp)
++      REG_L x26, PT_S10(sp)
++      REG_L x27, PT_S11(sp)
++      REG_L x28, PT_T3(sp)
++      REG_L x29, PT_T4(sp)
++      REG_L x30, PT_T5(sp)
++      REG_L x31, PT_T6(sp)
++
++      addi    sp, sp, PT_SIZE_ON_STACK
++      addi    sp, sp, SZREG
++      .endm
++#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+ ENTRY(ftrace_caller)
+-      /*
+-       * a0: the address in the caller when calling ftrace_caller
+-       * a1: the caller's return address
+-       * a2: the address of global variable function_trace_op
+-       */
+-      ld      a1, -8(s0)
+-      addi    a0, ra, -MCOUNT_INSN_SIZE
+-      la      t5, function_trace_op
+-      ld      a2, 0(t5)
++      SAVE_ABI
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      /*
+-       * the graph tracer (specifically, prepare_ftrace_return) needs these
+-       * arguments but for now the function tracer occupies the regs, so we
+-       * save them in temporary regs to recover later.
+-       */
+-      addi    t0, s0, -8
+-      mv      t1, a0
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+-      ld      t2, -16(s0)
+-#endif
+-#endif
++      addi    a0, ra, -FENTRY_RA_OFFSET
++      la      a1, function_trace_op
++      REG_L   a2, 0(a1)
++      REG_L   a1, ABI_SIZE_ON_STACK(sp)
++      mv      a3, sp
+-      SAVE_ABI_STATE
+ ftrace_call:
+       .global ftrace_call
+-      /*
+-       * For the dynamic ftrace to work, here we should reserve at least
+-       * 8 bytes for a functional auipc-jalr pair.  The following call
+-       * serves this purpose.
+-       *
+-       * Calling ftrace_update_ftrace_func would overwrite the nops below.
+-       * Check ftrace_modify_all_code for details.
+-       */
+       call    ftrace_stub
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      RESTORE_GRAPH_ARGS
+-      call    ftrace_graph_caller
++      addi    a0, sp, ABI_SIZE_ON_STACK
++      REG_L   a1, ABI_RA(sp)
++      addi    a1, a1, -FENTRY_RA_OFFSET
++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
++      mv      a2, s0
+ #endif
+-
+-      RESTORE_ABI_STATE
++ftrace_graph_call:
++      .global ftrace_graph_call
++      call    ftrace_stub
++#endif
++      RESTORE_ABI
+       ret
+ ENDPROC(ftrace_caller)
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+-      .macro SAVE_ALL
+-      addi    sp, sp, -(PT_SIZE_ON_STACK+16)
+-      sd      s0, (PT_SIZE_ON_STACK)(sp)
+-      sd      ra, (PT_SIZE_ON_STACK+8)(sp)
+-      addi    s0, sp, (PT_SIZE_ON_STACK+16)
+-
+-      sd x1,  PT_RA(sp)
+-      sd x2,  PT_SP(sp)
+-      sd x3,  PT_GP(sp)
+-      sd x4,  PT_TP(sp)
+-      sd x5,  PT_T0(sp)
+-      sd x6,  PT_T1(sp)
+-      sd x7,  PT_T2(sp)
+-      sd x8,  PT_S0(sp)
+-      sd x9,  PT_S1(sp)
+-      sd x10, PT_A0(sp)
+-      sd x11, PT_A1(sp)
+-      sd x12, PT_A2(sp)
+-      sd x13, PT_A3(sp)
+-      sd x14, PT_A4(sp)
+-      sd x15, PT_A5(sp)
+-      sd x16, PT_A6(sp)
+-      sd x17, PT_A7(sp)
+-      sd x18, PT_S2(sp)
+-      sd x19, PT_S3(sp)
+-      sd x20, PT_S4(sp)
+-      sd x21, PT_S5(sp)
+-      sd x22, PT_S6(sp)
+-      sd x23, PT_S7(sp)
+-      sd x24, PT_S8(sp)
+-      sd x25, PT_S9(sp)
+-      sd x26, PT_S10(sp)
+-      sd x27, PT_S11(sp)
+-      sd x28, PT_T3(sp)
+-      sd x29, PT_T4(sp)
+-      sd x30, PT_T5(sp)
+-      sd x31, PT_T6(sp)
+-      .endm
+-
+-      .macro RESTORE_ALL
+-      ld x1,  PT_RA(sp)
+-      ld x2,  PT_SP(sp)
+-      ld x3,  PT_GP(sp)
+-      ld x4,  PT_TP(sp)
+-      ld x5,  PT_T0(sp)
+-      ld x6,  PT_T1(sp)
+-      ld x7,  PT_T2(sp)
+-      ld x8,  PT_S0(sp)
+-      ld x9,  PT_S1(sp)
+-      ld x10, PT_A0(sp)
+-      ld x11, PT_A1(sp)
+-      ld x12, PT_A2(sp)
+-      ld x13, PT_A3(sp)
+-      ld x14, PT_A4(sp)
+-      ld x15, PT_A5(sp)
+-      ld x16, PT_A6(sp)
+-      ld x17, PT_A7(sp)
+-      ld x18, PT_S2(sp)
+-      ld x19, PT_S3(sp)
+-      ld x20, PT_S4(sp)
+-      ld x21, PT_S5(sp)
+-      ld x22, PT_S6(sp)
+-      ld x23, PT_S7(sp)
+-      ld x24, PT_S8(sp)
+-      ld x25, PT_S9(sp)
+-      ld x26, PT_S10(sp)
+-      ld x27, PT_S11(sp)
+-      ld x28, PT_T3(sp)
+-      ld x29, PT_T4(sp)
+-      ld x30, PT_T5(sp)
+-      ld x31, PT_T6(sp)
+-
+-      ld      s0, (PT_SIZE_ON_STACK)(sp)
+-      ld      ra, (PT_SIZE_ON_STACK+8)(sp)
+-      addi    sp, sp, (PT_SIZE_ON_STACK+16)
+-      .endm
+-
+-      .macro RESTORE_GRAPH_REG_ARGS
+-      ld      a0, PT_T0(sp)
+-      ld      a1, PT_T1(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+-      ld      a2, PT_T2(sp)
+-#endif
+-      .endm
+-
+-/*
+- * Most of the contents are the same as ftrace_caller.
+- */
+ ENTRY(ftrace_regs_caller)
+-      /*
+-       * a3: the address of all registers in the stack
+-       */
+-      ld      a1, -8(s0)
+-      addi    a0, ra, -MCOUNT_INSN_SIZE
+-      la      t5, function_trace_op
+-      ld      a2, 0(t5)
+-      addi    a3, sp, -(PT_SIZE_ON_STACK+16)
+-
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      addi    t0, s0, -8
+-      mv      t1, a0
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+-      ld      t2, -16(s0)
+-#endif
+-#endif
+       SAVE_ALL
++      addi    a0, ra, -FENTRY_RA_OFFSET
++      la      a1, function_trace_op
++      REG_L   a2, 0(a1)
++      REG_L   a1, PT_SIZE_ON_STACK(sp)
++      mv      a3, sp
++
+ ftrace_regs_call:
+       .global ftrace_regs_call
+       call    ftrace_stub
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-      RESTORE_GRAPH_REG_ARGS
+-      call    ftrace_graph_caller
++      addi    a0, sp, PT_RA
++      REG_L   a1, PT_EPC(sp)
++      addi    a1, a1, -FENTRY_RA_OFFSET
++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
++      mv      a2, s0
++#endif
++ftrace_graph_regs_call:
++      .global ftrace_graph_regs_call
++      call    ftrace_stub
+ #endif
+       RESTORE_ALL
+-- 
+2.39.2
+
diff --git a/queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch b/queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch
new file mode 100644 (file)
index 0000000..7e966f2
--- /dev/null
@@ -0,0 +1,79 @@
+From f8a58485c0ab2ebd34421c68b42f82b6751ab879 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Feb 2023 12:52:00 -0800
+Subject: scsi: core: Remove the /proc/scsi/${proc_name} directory earlier
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit fc663711b94468f4e1427ebe289c9f05669699c9 ]
+
+Remove the /proc/scsi/${proc_name} directory earlier to fix a race
+condition between unloading and reloading kernel modules. This fixes a bug
+introduced in 2009 by commit 77c019768f06 ("[SCSI] fix /proc memory leak in
+the SCSI core").
+
+Fix the following kernel warning:
+
+proc_dir_entry 'scsi/scsi_debug' already registered
+WARNING: CPU: 19 PID: 27986 at fs/proc/generic.c:376 proc_register+0x27d/0x2e0
+Call Trace:
+ proc_mkdir+0xb5/0xe0
+ scsi_proc_hostdir_add+0xb5/0x170
+ scsi_host_alloc+0x683/0x6c0
+ sdebug_driver_probe+0x6b/0x2d0 [scsi_debug]
+ really_probe+0x159/0x540
+ __driver_probe_device+0xdc/0x230
+ driver_probe_device+0x4f/0x120
+ __device_attach_driver+0xef/0x180
+ bus_for_each_drv+0xe5/0x130
+ __device_attach+0x127/0x290
+ device_initial_probe+0x17/0x20
+ bus_probe_device+0x110/0x130
+ device_add+0x673/0xc80
+ device_register+0x1e/0x30
+ sdebug_add_host_helper+0x1a7/0x3b0 [scsi_debug]
+ scsi_debug_init+0x64f/0x1000 [scsi_debug]
+ do_one_initcall+0xd7/0x470
+ do_init_module+0xe7/0x330
+ load_module+0x122a/0x12c0
+ __do_sys_finit_module+0x124/0x1a0
+ __x64_sys_finit_module+0x46/0x50
+ do_syscall_64+0x38/0x80
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Link: https://lore.kernel.org/r/20230210205200.36973-3-bvanassche@acm.org
+Cc: Alan Stern <stern@rowland.harvard.edu>
+Cc: Yi Zhang <yi.zhang@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core")
+Reported-by: Yi Zhang <yi.zhang@redhat.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/hosts.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index d664c4650b2dd..fae0323242103 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,6 +180,7 @@ void scsi_remove_host(struct Scsi_Host *shost)
+       scsi_forget_host(shost);
+       mutex_unlock(&shost->scan_mutex);
+       scsi_proc_host_rm(shost);
++      scsi_proc_hostdir_rm(shost->hostt);
+       spin_lock_irqsave(shost->host_lock, flags);
+       if (scsi_host_set_state(shost, SHOST_DEL))
+@@ -321,6 +322,7 @@ static void scsi_host_dev_release(struct device *dev)
+       struct Scsi_Host *shost = dev_to_shost(dev);
+       struct device *parent = dev->parent;
++      /* In case scsi_remove_host() has not been called. */
+       scsi_proc_hostdir_rm(shost->hostt);
+       /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
+-- 
+2.39.2
+
diff --git a/queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch b/queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch
new file mode 100644 (file)
index 0000000..667acb0
--- /dev/null
@@ -0,0 +1,60 @@
+From bc488ae13b236a092e733fa7e55c51c6c308a204 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 16:23:40 +0530
+Subject: scsi: megaraid_sas: Update max supported LD IDs to 240
+
+From: Chandrakanth Patil <chandrakanth.patil@broadcom.com>
+
+[ Upstream commit bfa659177dcba48cf13f2bd88c1972f12a60bf1c ]
+
+The firmware only supports Logical Disk IDs up to 240 and LD ID 255 (0xFF)
+is reserved for deleted LDs. However, in some cases, firmware was assigning
+LD ID 254 (0xFE) to deleted LDs and this was causing the driver to mark the
+wrong disk as deleted. This in turn caused the wrong disk device to be
+taken offline by the SCSI midlayer.
+
+To address this issue, limit the LD ID range from 255 to 240. This ensures
+the deleted LD ID is properly identified and removed by the driver without
+accidently deleting any valid LDs.
+
+Fixes: ae6874ba4b43 ("scsi: megaraid_sas: Early detection of VD deletion through RaidMap update")
+Reported-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Chandrakanth Patil <chandrakanth.patil@broadcom.com>
+Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
+Link: https://lore.kernel.org/r/20230302105342.34933-2-chandrakanth.patil@broadcom.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/megaraid/megaraid_sas.h    | 2 ++
+ drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
+index c088a848776ef..2d5b1d5978664 100644
+--- a/drivers/scsi/megaraid/megaraid_sas.h
++++ b/drivers/scsi/megaraid/megaraid_sas.h
+@@ -1517,6 +1517,8 @@ struct megasas_ctrl_info {
+ #define MEGASAS_MAX_LD_IDS                    (MEGASAS_MAX_LD_CHANNELS * \
+                                               MEGASAS_MAX_DEV_PER_CHANNEL)
++#define MEGASAS_MAX_SUPPORTED_LD_IDS          240
++
+ #define MEGASAS_MAX_SECTORS                    (2*1024)
+ #define MEGASAS_MAX_SECTORS_IEEE              (2*128)
+ #define MEGASAS_DBG_LVL                               1
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c
+index 83f69c33b01a9..ec10d35b4685a 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fp.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c
+@@ -358,7 +358,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id)
+               ld = MR_TargetIdToLdGet(i, drv_map);
+               /* For non existing VDs, iterate to next VD*/
+-              if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1))
++              if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS)
+                       continue;
+               raid = MR_LdRaidGet(ld, drv_map);
+-- 
+2.39.2
+
diff --git a/queue-5.10/selftests-landlock-add-clang-format-exceptions.patch b/queue-5.10/selftests-landlock-add-clang-format-exceptions.patch
new file mode 100644 (file)
index 0000000..5ee7009
--- /dev/null
@@ -0,0 +1,226 @@
+From 76f7cb6aeb2007c0d4febda804772cf6ae508604 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 May 2022 18:05:09 +0200
+Subject: selftests/landlock: Add clang-format exceptions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@digikod.net>
+
+[ Upstream commit 4598d9abf4215e1e371a35683350d50122793c80 ]
+
+In preparation to a following commit, add clang-format on and
+clang-format off stanzas around constant definitions and the TEST_F_FORK
+macro.  This enables to keep aligned values, which is much more readable
+than packed definitions.
+
+Add other clang-format exceptions for FIXTURE() and
+FIXTURE_VARIANT_ADD() declarations to force space before open brace,
+which is reported by checkpatch.pl .
+
+Link: https://lore.kernel.org/r/20220506160513.523257-4-mic@digikod.net
+Cc: stable@vger.kernel.org
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Stable-dep-of: 8677e555f17f ("selftests/landlock: Test ptrace as much as possible with Yama")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/landlock/common.h     |  2 ++
+ tools/testing/selftests/landlock/fs_test.c    | 23 ++++++++++++++-----
+ .../testing/selftests/landlock/ptrace_test.c  | 20 +++++++++++++++-
+ 3 files changed, 38 insertions(+), 7 deletions(-)
+
+diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
+index 20e2a9286d710..61127fffbeb83 100644
+--- a/tools/testing/selftests/landlock/common.h
++++ b/tools/testing/selftests/landlock/common.h
+@@ -29,6 +29,7 @@
+  * this to be possible, we must not call abort() but instead exit smoothly
+  * (hence the step print).
+  */
++/* clang-format off */
+ #define TEST_F_FORK(fixture_name, test_name) \
+       static void fixture_name##_##test_name##_child( \
+               struct __test_metadata *_metadata, \
+@@ -75,6 +76,7 @@
+               FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+               const FIXTURE_VARIANT(fixture_name) \
+                       __attribute__((unused)) *variant)
++/* clang-format on */
+ #ifndef landlock_create_ruleset
+ static inline int landlock_create_ruleset(
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+index db153452b110a..036d55836b9ec 100644
+--- a/tools/testing/selftests/landlock/fs_test.c
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -256,8 +256,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
+       EXPECT_EQ(0, remove_path(dir_s3d2));
+ }
+-FIXTURE(layout1) {
+-};
++/* clang-format off */
++FIXTURE(layout1) {};
++/* clang-format on */
+ FIXTURE_SETUP(layout1)
+ {
+@@ -411,6 +412,8 @@ TEST_F_FORK(layout1, inval)
+       ASSERT_EQ(0, close(ruleset_fd));
+ }
++/* clang-format off */
++
+ #define ACCESS_FILE ( \
+       LANDLOCK_ACCESS_FS_EXECUTE | \
+       LANDLOCK_ACCESS_FS_WRITE_FILE | \
+@@ -431,6 +434,8 @@ TEST_F_FORK(layout1, inval)
+       LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+       ACCESS_LAST)
++/* clang-format on */
++
+ TEST_F_FORK(layout1, file_access_rights)
+ {
+       __u64 access;
+@@ -487,6 +492,8 @@ struct rule {
+       __u64 access;
+ };
++/* clang-format off */
++
+ #define ACCESS_RO ( \
+       LANDLOCK_ACCESS_FS_READ_FILE | \
+       LANDLOCK_ACCESS_FS_READ_DIR)
+@@ -495,6 +502,8 @@ struct rule {
+       ACCESS_RO | \
+       LANDLOCK_ACCESS_FS_WRITE_FILE)
++/* clang-format on */
++
+ static int create_ruleset(struct __test_metadata *const _metadata,
+               const __u64 handled_access_fs, const struct rule rules[])
+ {
+@@ -2105,8 +2114,9 @@ TEST_F_FORK(layout1, proc_pipe)
+       ASSERT_EQ(0, close(pipe_fds[1]));
+ }
+-FIXTURE(layout1_bind) {
+-};
++/* clang-format off */
++FIXTURE(layout1_bind) {};
++/* clang-format on */
+ FIXTURE_SETUP(layout1_bind)
+ {
+@@ -2446,8 +2456,9 @@ static const char (*merge_sub_files[])[] = {
+  *         â””── work
+  */
+-FIXTURE(layout2_overlay) {
+-};
++/* clang-format off */
++FIXTURE(layout2_overlay) {};
++/* clang-format on */
+ FIXTURE_SETUP(layout2_overlay)
+ {
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+index 15fbef9cc8496..090adadfe2dc3 100644
+--- a/tools/testing/selftests/landlock/ptrace_test.c
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -59,7 +59,9 @@ static int test_ptrace_read(const pid_t pid)
+       return 0;
+ }
+-FIXTURE(hierarchy) { };
++/* clang-format off */
++FIXTURE(hierarchy) {};
++/* clang-format on */
+ FIXTURE_VARIANT(hierarchy) {
+       const bool domain_both;
+@@ -83,7 +85,9 @@ FIXTURE_VARIANT(hierarchy) {
+  *       \              P2 -> P1 : allow
+  *        'P2
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
++      /* clang-format on */
+       .domain_both = false,
+       .domain_parent = false,
+       .domain_child = false,
+@@ -98,7 +102,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+  *        |  P2  |
+  *        '------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
++      /* clang-format on */
+       .domain_both = false,
+       .domain_parent = false,
+       .domain_child = true,
+@@ -112,7 +118,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+  *            '
+  *            P2
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
++      /* clang-format on */
+       .domain_both = false,
+       .domain_parent = true,
+       .domain_child = false,
+@@ -127,7 +135,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+  *         |  P2  |
+  *         '------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
++      /* clang-format on */
+       .domain_both = false,
+       .domain_parent = true,
+       .domain_child = true,
+@@ -142,7 +152,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+  * |         P2  |
+  * '-------------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
++      /* clang-format on */
+       .domain_both = true,
+       .domain_parent = false,
+       .domain_child = false,
+@@ -158,7 +170,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+  * |        '------' |
+  * '-----------------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
++      /* clang-format on */
+       .domain_both = true,
+       .domain_parent = false,
+       .domain_child = true,
+@@ -174,7 +188,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+  * |             P2  |
+  * '-----------------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
++      /* clang-format on */
+       .domain_both = true,
+       .domain_parent = true,
+       .domain_child = false,
+@@ -192,7 +208,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+  * |        '------' |
+  * '-----------------'
+  */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
++      /* clang-format on */
+       .domain_both = true,
+       .domain_parent = true,
+       .domain_child = true,
+-- 
+2.39.2
+
diff --git a/queue-5.10/selftests-landlock-add-user-space-tests.patch b/queue-5.10/selftests-landlock-add-user-space-tests.patch
new file mode 100644 (file)
index 0000000..2332bfa
--- /dev/null
@@ -0,0 +1,3694 @@
+From 9f43c1d77d1e7cac1c93a6f95a6d91ebc4089a73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Apr 2021 17:41:20 +0200
+Subject: selftests/landlock: Add user space tests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+[ Upstream commit e1199815b47be83346c03e20a3de76f934e4bb34 ]
+
+Test all Landlock system calls, ptrace hooks semantic and filesystem
+access-control with multiple layouts.
+
+Test coverage for security/landlock/ is 93.6% of lines.  The code not
+covered only deals with internal kernel errors (e.g. memory allocation)
+and race conditions.
+
+Cc: James Morris <jmorris@namei.org>
+Cc: Jann Horn <jannh@google.com>
+Cc: Serge E. Hallyn <serge@hallyn.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Vincent Dagonneau <vincent.dagonneau@ssi.gouv.fr>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210422154123.13086-11-mic@digikod.net
+Signed-off-by: James Morris <jamorris@linux.microsoft.com>
+Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS                                   |    1 +
+ tools/testing/selftests/Makefile              |    1 +
+ tools/testing/selftests/landlock/.gitignore   |    2 +
+ tools/testing/selftests/landlock/Makefile     |   24 +
+ tools/testing/selftests/landlock/base_test.c  |  219 ++
+ tools/testing/selftests/landlock/common.h     |  183 ++
+ tools/testing/selftests/landlock/config       |    7 +
+ tools/testing/selftests/landlock/fs_test.c    | 2791 +++++++++++++++++
+ .../testing/selftests/landlock/ptrace_test.c  |  337 ++
+ tools/testing/selftests/landlock/true.c       |    5 +
+ 10 files changed, 3570 insertions(+)
+ create mode 100644 tools/testing/selftests/landlock/.gitignore
+ create mode 100644 tools/testing/selftests/landlock/Makefile
+ create mode 100644 tools/testing/selftests/landlock/base_test.c
+ create mode 100644 tools/testing/selftests/landlock/common.h
+ create mode 100644 tools/testing/selftests/landlock/config
+ create mode 100644 tools/testing/selftests/landlock/fs_test.c
+ create mode 100644 tools/testing/selftests/landlock/ptrace_test.c
+ create mode 100644 tools/testing/selftests/landlock/true.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 72815c1a325eb..5bc6a028236e3 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9843,6 +9843,7 @@ S:       Supported
+ W:    https://landlock.io
+ T:    git https://github.com/landlock-lsm/linux.git
+ F:    security/landlock/
++F:    tools/testing/selftests/landlock/
+ K:    landlock
+ K:    LANDLOCK
+diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
+index db1e24d7155fa..ca96973dca44d 100644
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -26,6 +26,7 @@ TARGETS += ir
+ TARGETS += kcmp
+ TARGETS += kexec
+ TARGETS += kvm
++TARGETS += landlock
+ TARGETS += lib
+ TARGETS += livepatch
+ TARGETS += lkdtm
+diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
+new file mode 100644
+index 0000000000000..470203a7cd737
+--- /dev/null
++++ b/tools/testing/selftests/landlock/.gitignore
+@@ -0,0 +1,2 @@
++/*_test
++/true
+diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
+new file mode 100644
+index 0000000000000..a99596ca9882b
+--- /dev/null
++++ b/tools/testing/selftests/landlock/Makefile
+@@ -0,0 +1,24 @@
++# SPDX-License-Identifier: GPL-2.0
++
++CFLAGS += -Wall -O2
++
++src_test := $(wildcard *_test.c)
++
++TEST_GEN_PROGS := $(src_test:.c=)
++
++TEST_GEN_PROGS_EXTENDED := true
++
++KSFT_KHDR_INSTALL := 1
++OVERRIDE_TARGETS := 1
++include ../lib.mk
++
++khdr_dir = $(top_srcdir)/usr/include
++
++$(khdr_dir)/linux/landlock.h: khdr
++      @:
++
++$(OUTPUT)/true: true.c
++      $(LINK.c) $< $(LDLIBS) -o $@ -static
++
++$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h
++      $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir)
+diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
+new file mode 100644
+index 0000000000000..262c3c8d953ad
+--- /dev/null
++++ b/tools/testing/selftests/landlock/base_test.c
+@@ -0,0 +1,219 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Common user space base
++ *
++ * Copyright Â© 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2019-2020 ANSSI
++ */
++
++#define _GNU_SOURCE
++#include <errno.h>
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <string.h>
++#include <sys/prctl.h>
++#include <sys/socket.h>
++#include <sys/types.h>
++
++#include "common.h"
++
++#ifndef O_PATH
++#define O_PATH                010000000
++#endif
++
++TEST(inconsistent_attr) {
++      const long page_size = sysconf(_SC_PAGESIZE);
++      char *const buf = malloc(page_size + 1);
++      struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
++
++      ASSERT_NE(NULL, buf);
++
++      /* Checks copy_from_user(). */
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0));
++      /* The size if less than sizeof(struct landlock_attr_enforce). */
++      ASSERT_EQ(EINVAL, errno);
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
++      ASSERT_EQ(EINVAL, errno);
++
++      ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
++      /* The size if less than sizeof(struct landlock_attr_enforce). */
++      ASSERT_EQ(EFAULT, errno);
++
++      ASSERT_EQ(-1, landlock_create_ruleset(NULL,
++                              sizeof(struct landlock_ruleset_attr), 0));
++      ASSERT_EQ(EFAULT, errno);
++
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
++      ASSERT_EQ(E2BIG, errno);
++
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
++                              sizeof(struct landlock_ruleset_attr), 0));
++      ASSERT_EQ(ENOMSG, errno);
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
++      ASSERT_EQ(ENOMSG, errno);
++
++      /* Checks non-zero value. */
++      buf[page_size - 2] = '.';
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
++      ASSERT_EQ(E2BIG, errno);
++
++      ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
++      ASSERT_EQ(E2BIG, errno);
++
++      free(buf);
++}
++
++TEST(empty_path_beneath_attr) {
++      const struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
++      };
++      const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              NULL, 0));
++      ASSERT_EQ(EFAULT, errno);
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST(inval_fd_enforce) {
++      ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++
++      ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
++      ASSERT_EQ(EBADF, errno);
++}
++
++TEST(unpriv_enforce_without_no_new_privs) {
++      int err;
++
++      drop_caps(_metadata);
++      err = landlock_restrict_self(-1, 0);
++      ASSERT_EQ(EPERM, errno);
++      ASSERT_EQ(err, -1);
++}
++
++TEST(ruleset_fd_io)
++{
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
++      };
++      int ruleset_fd;
++      char buf;
++
++      drop_caps(_metadata);
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
++      ASSERT_EQ(EINVAL, errno);
++      ASSERT_EQ(-1, read(ruleset_fd, &buf, 1));
++      ASSERT_EQ(EINVAL, errno);
++
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */
++TEST(ruleset_fd_transfer)
++{
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
++      };
++      struct landlock_path_beneath_attr path_beneath_attr = {
++              .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
++      };
++      int ruleset_fd_tx, dir_fd;
++      union {
++              /* Aligned ancillary data buffer. */
++              char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))];
++              struct cmsghdr _align;
++      } cmsg_tx = {};
++      char data_tx = '.';
++      struct iovec io = {
++              .iov_base = &data_tx,
++              .iov_len = sizeof(data_tx),
++      };
++      struct msghdr msg = {
++              .msg_iov = &io,
++              .msg_iovlen = 1,
++              .msg_control = &cmsg_tx.buf,
++              .msg_controllen = sizeof(cmsg_tx.buf),
++      };
++      struct cmsghdr *cmsg;
++      int socket_fds[2];
++      pid_t child;
++      int status;
++
++      drop_caps(_metadata);
++
++      /* Creates a test ruleset with a simple rule. */
++      ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd_tx);
++      path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
++                      O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, path_beneath_attr.parent_fd);
++      ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath_attr, 0));
++      ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
++
++      cmsg = CMSG_FIRSTHDR(&msg);
++      ASSERT_NE(NULL, cmsg);
++      cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx));
++      cmsg->cmsg_level = SOL_SOCKET;
++      cmsg->cmsg_type = SCM_RIGHTS;
++      memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
++
++      /* Sends the ruleset FD over a socketpair and then close it. */
++      ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
++      ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
++      ASSERT_EQ(0, close(socket_fds[0]));
++      ASSERT_EQ(0, close(ruleset_fd_tx));
++
++      child = fork();
++      ASSERT_LE(0, child);
++      if (child == 0) {
++              int ruleset_fd_rx;
++
++              *(char *)msg.msg_iov->iov_base = '\0';
++              ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
++              ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
++              ASSERT_EQ(0, close(socket_fds[1]));
++              cmsg = CMSG_FIRSTHDR(&msg);
++              ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx)));
++              memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx));
++
++              /* Enforces the received ruleset on the child. */
++              ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++              ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0));
++              ASSERT_EQ(0, close(ruleset_fd_rx));
++
++              /* Checks that the ruleset enforcement. */
++              ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
++              ASSERT_EQ(EACCES, errno);
++              dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++              ASSERT_LE(0, dir_fd);
++              ASSERT_EQ(0, close(dir_fd));
++              _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
++              return;
++      }
++
++      ASSERT_EQ(0, close(socket_fds[1]));
++
++      /* Checks that the parent is unrestricted. */
++      dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, dir_fd);
++      ASSERT_EQ(0, close(dir_fd));
++      dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, dir_fd);
++      ASSERT_EQ(0, close(dir_fd));
++
++      ASSERT_EQ(child, waitpid(child, &status, 0));
++      ASSERT_EQ(1, WIFEXITED(status));
++      ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
+new file mode 100644
+index 0000000000000..20e2a9286d710
+--- /dev/null
++++ b/tools/testing/selftests/landlock/common.h
+@@ -0,0 +1,183 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Landlock test helpers
++ *
++ * Copyright Â© 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2019-2020 ANSSI
++ * Copyright Â© 2021 Microsoft Corporation
++ */
++
++#include <errno.h>
++#include <linux/landlock.h>
++#include <sys/capability.h>
++#include <sys/syscall.h>
++#include <sys/types.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "../kselftest_harness.h"
++
++#ifndef ARRAY_SIZE
++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
++#endif
++
++/*
++ * TEST_F_FORK() is useful when a test drop privileges but the corresponding
++ * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory
++ * where write actions are denied).  For convenience, FIXTURE_TEARDOWN() is
++ * also called when the test failed, but not when FIXTURE_SETUP() failed.  For
++ * this to be possible, we must not call abort() but instead exit smoothly
++ * (hence the step print).
++ */
++#define TEST_F_FORK(fixture_name, test_name) \
++      static void fixture_name##_##test_name##_child( \
++              struct __test_metadata *_metadata, \
++              FIXTURE_DATA(fixture_name) *self, \
++              const FIXTURE_VARIANT(fixture_name) *variant); \
++      TEST_F(fixture_name, test_name) \
++      { \
++              int status; \
++              const pid_t child = fork(); \
++              if (child < 0) \
++                      abort(); \
++              if (child == 0) { \
++                      _metadata->no_print = 1; \
++                      fixture_name##_##test_name##_child(_metadata, self, variant); \
++                      if (_metadata->skip) \
++                              _exit(255); \
++                      if (_metadata->passed) \
++                              _exit(0); \
++                      _exit(_metadata->step); \
++              } \
++              if (child != waitpid(child, &status, 0)) \
++                      abort(); \
++              if (WIFSIGNALED(status) || !WIFEXITED(status)) { \
++                      _metadata->passed = 0; \
++                      _metadata->step = 1; \
++                      return; \
++              } \
++              switch (WEXITSTATUS(status)) { \
++              case 0: \
++                      _metadata->passed = 1; \
++                      break; \
++              case 255: \
++                      _metadata->passed = 1; \
++                      _metadata->skip = 1; \
++                      break; \
++              default: \
++                      _metadata->passed = 0; \
++                      _metadata->step = WEXITSTATUS(status); \
++                      break; \
++              } \
++      } \
++      static void fixture_name##_##test_name##_child( \
++              struct __test_metadata __attribute__((unused)) *_metadata, \
++              FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
++              const FIXTURE_VARIANT(fixture_name) \
++                      __attribute__((unused)) *variant)
++
++#ifndef landlock_create_ruleset
++static inline int landlock_create_ruleset(
++              const struct landlock_ruleset_attr *const attr,
++              const size_t size, const __u32 flags)
++{
++      return syscall(__NR_landlock_create_ruleset, attr, size, flags);
++}
++#endif
++
++#ifndef landlock_add_rule
++static inline int landlock_add_rule(const int ruleset_fd,
++              const enum landlock_rule_type rule_type,
++              const void *const rule_attr, const __u32 flags)
++{
++      return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
++                      rule_attr, flags);
++}
++#endif
++
++#ifndef landlock_restrict_self
++static inline int landlock_restrict_self(const int ruleset_fd,
++              const __u32 flags)
++{
++      return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
++}
++#endif
++
++static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
++{
++      cap_t cap_p;
++      /* Only these three capabilities are useful for the tests. */
++      const cap_value_t caps[] = {
++              CAP_DAC_OVERRIDE,
++              CAP_MKNOD,
++              CAP_SYS_ADMIN,
++              CAP_SYS_CHROOT,
++      };
++
++      cap_p = cap_get_proc();
++      EXPECT_NE(NULL, cap_p) {
++              TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
++      }
++      EXPECT_NE(-1, cap_clear(cap_p)) {
++              TH_LOG("Failed to cap_clear: %s", strerror(errno));
++      }
++      if (!drop_all) {
++              EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
++                                      ARRAY_SIZE(caps), caps, CAP_SET)) {
++                      TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
++              }
++      }
++      EXPECT_NE(-1, cap_set_proc(cap_p)) {
++              TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
++      }
++      EXPECT_NE(-1, cap_free(cap_p)) {
++              TH_LOG("Failed to cap_free: %s", strerror(errno));
++      }
++}
++
++/* We cannot put such helpers in a library because of kselftest_harness.h . */
++__attribute__((__unused__))
++static void disable_caps(struct __test_metadata *const _metadata)
++{
++      _init_caps(_metadata, false);
++}
++
++__attribute__((__unused__))
++static void drop_caps(struct __test_metadata *const _metadata)
++{
++      _init_caps(_metadata, true);
++}
++
++static void _effective_cap(struct __test_metadata *const _metadata,
++              const cap_value_t caps, const cap_flag_value_t value)
++{
++      cap_t cap_p;
++
++      cap_p = cap_get_proc();
++      EXPECT_NE(NULL, cap_p) {
++              TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
++      }
++      EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
++              TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
++      }
++      EXPECT_NE(-1, cap_set_proc(cap_p)) {
++              TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
++      }
++      EXPECT_NE(-1, cap_free(cap_p)) {
++              TH_LOG("Failed to cap_free: %s", strerror(errno));
++      }
++}
++
++__attribute__((__unused__))
++static void set_cap(struct __test_metadata *const _metadata,
++              const cap_value_t caps)
++{
++      _effective_cap(_metadata, caps, CAP_SET);
++}
++
++__attribute__((__unused__))
++static void clear_cap(struct __test_metadata *const _metadata,
++              const cap_value_t caps)
++{
++      _effective_cap(_metadata, caps, CAP_CLEAR);
++}
+diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
+new file mode 100644
+index 0000000000000..0f0a65287bacf
+--- /dev/null
++++ b/tools/testing/selftests/landlock/config
+@@ -0,0 +1,7 @@
++CONFIG_OVERLAY_FS=y
++CONFIG_SECURITY_LANDLOCK=y
++CONFIG_SECURITY_PATH=y
++CONFIG_SECURITY=y
++CONFIG_SHMEM=y
++CONFIG_TMPFS_XATTR=y
++CONFIG_TMPFS=y
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+new file mode 100644
+index 0000000000000..10c9a1e4ebd9b
+--- /dev/null
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -0,0 +1,2791 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Filesystem
++ *
++ * Copyright Â© 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2020 ANSSI
++ * Copyright Â© 2020-2021 Microsoft Corporation
++ */
++
++#define _GNU_SOURCE
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <sched.h>
++#include <string.h>
++#include <sys/capability.h>
++#include <sys/mount.h>
++#include <sys/prctl.h>
++#include <sys/sendfile.h>
++#include <sys/stat.h>
++#include <sys/sysmacros.h>
++#include <unistd.h>
++
++#include "common.h"
++
++#define TMP_DIR               "tmp"
++#define BINARY_PATH   "./true"
++
++/* Paths (sibling number and depth) */
++static const char dir_s1d1[] = TMP_DIR "/s1d1";
++static const char file1_s1d1[] = TMP_DIR "/s1d1/f1";
++static const char file2_s1d1[] = TMP_DIR "/s1d1/f2";
++static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2";
++static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1";
++static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2";
++static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3";
++static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1";
++static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2";
++
++static const char dir_s2d1[] = TMP_DIR "/s2d1";
++static const char file1_s2d1[] = TMP_DIR "/s2d1/f1";
++static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2";
++static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1";
++static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3";
++static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1";
++static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2";
++
++static const char dir_s3d1[] = TMP_DIR "/s3d1";
++/* dir_s3d2 is a mount point. */
++static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2";
++static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
++
++/*
++ * layout1 hierarchy:
++ *
++ * tmp
++ * â”œâ”€â”€ s1d1
++ * â”‚   â”œâ”€â”€ f1
++ * â”‚   â”œâ”€â”€ f2
++ * â”‚   â””── s1d2
++ * â”‚       â”œâ”€â”€ f1
++ * â”‚       â”œâ”€â”€ f2
++ * â”‚       â””── s1d3
++ * â”‚           â”œâ”€â”€ f1
++ * â”‚           â””── f2
++ * â”œâ”€â”€ s2d1
++ * â”‚   â”œâ”€â”€ f1
++ * â”‚   â””── s2d2
++ * â”‚       â”œâ”€â”€ f1
++ * â”‚       â””── s2d3
++ * â”‚           â”œâ”€â”€ f1
++ * â”‚           â””── f2
++ * â””── s3d1
++ *     â””── s3d2
++ *         â””── s3d3
++ */
++
++static void mkdir_parents(struct __test_metadata *const _metadata,
++              const char *const path)
++{
++      char *walker;
++      const char *parent;
++      int i, err;
++
++      ASSERT_NE(path[0], '\0');
++      walker = strdup(path);
++      ASSERT_NE(NULL, walker);
++      parent = walker;
++      for (i = 1; walker[i]; i++) {
++              if (walker[i] != '/')
++                      continue;
++              walker[i] = '\0';
++              err = mkdir(parent, 0700);
++              ASSERT_FALSE(err && errno != EEXIST) {
++                      TH_LOG("Failed to create directory \"%s\": %s",
++                                      parent, strerror(errno));
++              }
++              walker[i] = '/';
++      }
++      free(walker);
++}
++
++static void create_directory(struct __test_metadata *const _metadata,
++              const char *const path)
++{
++      mkdir_parents(_metadata, path);
++      ASSERT_EQ(0, mkdir(path, 0700)) {
++              TH_LOG("Failed to create directory \"%s\": %s", path,
++                              strerror(errno));
++      }
++}
++
++static void create_file(struct __test_metadata *const _metadata,
++              const char *const path)
++{
++      mkdir_parents(_metadata, path);
++      ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
++              TH_LOG("Failed to create file \"%s\": %s", path,
++                              strerror(errno));
++      }
++}
++
++static int remove_path(const char *const path)
++{
++      char *walker;
++      int i, ret, err = 0;
++
++      walker = strdup(path);
++      if (!walker) {
++              err = ENOMEM;
++              goto out;
++      }
++      if (unlink(path) && rmdir(path)) {
++              if (errno != ENOENT)
++                      err = errno;
++              goto out;
++      }
++      for (i = strlen(walker); i > 0; i--) {
++              if (walker[i] != '/')
++                      continue;
++              walker[i] = '\0';
++              ret = rmdir(walker);
++              if (ret) {
++                      if (errno != ENOTEMPTY && errno != EBUSY)
++                              err = errno;
++                      goto out;
++              }
++              if (strcmp(walker, TMP_DIR) == 0)
++                      goto out;
++      }
++
++out:
++      free(walker);
++      return err;
++}
++
++static void prepare_layout(struct __test_metadata *const _metadata)
++{
++      disable_caps(_metadata);
++      umask(0077);
++      create_directory(_metadata, TMP_DIR);
++
++      /*
++       * Do not pollute the rest of the system: creates a private mount point
++       * for tests relying on pivot_root(2) and move_mount(2).
++       */
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, unshare(CLONE_NEWNS));
++      ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700"));
++      ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++static void cleanup_layout(struct __test_metadata *const _metadata)
++{
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, umount(TMP_DIR));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, remove_path(TMP_DIR));
++}
++
++static void create_layout1(struct __test_metadata *const _metadata)
++{
++      create_file(_metadata, file1_s1d1);
++      create_file(_metadata, file1_s1d2);
++      create_file(_metadata, file1_s1d3);
++      create_file(_metadata, file2_s1d1);
++      create_file(_metadata, file2_s1d2);
++      create_file(_metadata, file2_s1d3);
++
++      create_file(_metadata, file1_s2d1);
++      create_file(_metadata, file1_s2d2);
++      create_file(_metadata, file1_s2d3);
++      create_file(_metadata, file2_s2d3);
++
++      create_directory(_metadata, dir_s3d2);
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700"));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++
++      ASSERT_EQ(0, mkdir(dir_s3d3, 0700));
++}
++
++static void remove_layout1(struct __test_metadata *const _metadata)
++{
++      EXPECT_EQ(0, remove_path(file2_s1d3));
++      EXPECT_EQ(0, remove_path(file2_s1d2));
++      EXPECT_EQ(0, remove_path(file2_s1d1));
++      EXPECT_EQ(0, remove_path(file1_s1d3));
++      EXPECT_EQ(0, remove_path(file1_s1d2));
++      EXPECT_EQ(0, remove_path(file1_s1d1));
++
++      EXPECT_EQ(0, remove_path(file2_s2d3));
++      EXPECT_EQ(0, remove_path(file1_s2d3));
++      EXPECT_EQ(0, remove_path(file1_s2d2));
++      EXPECT_EQ(0, remove_path(file1_s2d1));
++
++      EXPECT_EQ(0, remove_path(dir_s3d3));
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      umount(dir_s3d2);
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, remove_path(dir_s3d2));
++}
++
++FIXTURE(layout1) {
++};
++
++FIXTURE_SETUP(layout1)
++{
++      prepare_layout(_metadata);
++
++      create_layout1(_metadata);
++}
++
++FIXTURE_TEARDOWN(layout1)
++{
++      remove_layout1(_metadata);
++
++      cleanup_layout(_metadata);
++}
++
++/*
++ * This helper enables to use the ASSERT_* macros and print the line number
++ * pointing to the test caller.
++ */
++static int test_open_rel(const int dirfd, const char *const path, const int flags)
++{
++      int fd;
++
++      /* Works with file and directories. */
++      fd = openat(dirfd, path, flags | O_CLOEXEC);
++      if (fd < 0)
++              return errno;
++      /*
++       * Mixing error codes from close(2) and open(2) should not lead to any
++       * (access type) confusion for this test.
++       */
++      if (close(fd) != 0)
++              return errno;
++      return 0;
++}
++
++static int test_open(const char *const path, const int flags)
++{
++      return test_open_rel(AT_FDCWD, path, flags);
++}
++
++TEST_F_FORK(layout1, no_restriction)
++{
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, inval)
++{
++      struct landlock_path_beneath_attr path_beneath = {
++              .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE,
++              .parent_fd = -1,
++      };
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE,
++      };
++      int ruleset_fd;
++
++      path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
++                      O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd);
++
++      ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, ruleset_fd);
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
++      ASSERT_EQ(EBADF, errno);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, ruleset_fd);
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
++      ASSERT_EQ(EBADFD, errno);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Gets a real ruleset. */
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd);
++      ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++      /* Tests without O_PATH. */
++      path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd);
++      ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++      /* Tests with a ruleset FD. */
++      path_beneath.parent_fd = ruleset_fd;
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(EBADFD, errno);
++
++      /* Checks unhandled allowed_access. */
++      path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
++                      O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd);
++
++      /* Test with legitimate values. */
++      path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(EINVAL, errno);
++      path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
++
++      /* Test with unknown (64-bits) value. */
++      path_beneath.allowed_access |= (1ULL << 60);
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(EINVAL, errno);
++      path_beneath.allowed_access &= ~(1ULL << 60);
++
++      /* Test with no access. */
++      path_beneath.allowed_access = 0;
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(ENOMSG, errno);
++      path_beneath.allowed_access &= ~(1ULL << 60);
++
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++      /* Enforces the ruleset. */
++      ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++      ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
++
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++#define ACCESS_FILE ( \
++      LANDLOCK_ACCESS_FS_EXECUTE | \
++      LANDLOCK_ACCESS_FS_WRITE_FILE | \
++      LANDLOCK_ACCESS_FS_READ_FILE)
++
++#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM
++
++#define ACCESS_ALL ( \
++      ACCESS_FILE | \
++      LANDLOCK_ACCESS_FS_READ_DIR | \
++      LANDLOCK_ACCESS_FS_REMOVE_DIR | \
++      LANDLOCK_ACCESS_FS_REMOVE_FILE | \
++      LANDLOCK_ACCESS_FS_MAKE_CHAR | \
++      LANDLOCK_ACCESS_FS_MAKE_DIR | \
++      LANDLOCK_ACCESS_FS_MAKE_REG | \
++      LANDLOCK_ACCESS_FS_MAKE_SOCK | \
++      LANDLOCK_ACCESS_FS_MAKE_FIFO | \
++      LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
++      ACCESS_LAST)
++
++TEST_F_FORK(layout1, file_access_rights)
++{
++      __u64 access;
++      int err;
++      struct landlock_path_beneath_attr path_beneath = {};
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = ACCESS_ALL,
++      };
++      const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      /* Tests access rights for files. */
++      path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd);
++      for (access = 1; access <= ACCESS_LAST; access <<= 1) {
++              path_beneath.allowed_access = access;
++              err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0);
++              if ((access | ACCESS_FILE) == ACCESS_FILE) {
++                      ASSERT_EQ(0, err);
++              } else {
++                      ASSERT_EQ(-1, err);
++                      ASSERT_EQ(EINVAL, errno);
++              }
++      }
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++static void add_path_beneath(struct __test_metadata *const _metadata,
++              const int ruleset_fd, const __u64 allowed_access,
++              const char *const path)
++{
++      struct landlock_path_beneath_attr path_beneath = {
++              .allowed_access = allowed_access,
++      };
++
++      path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd) {
++              TH_LOG("Failed to open directory \"%s\": %s", path,
++                              strerror(errno));
++      }
++      ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0)) {
++              TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
++                              strerror(errno));
++      }
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++struct rule {
++      const char *path;
++      __u64 access;
++};
++
++#define ACCESS_RO ( \
++      LANDLOCK_ACCESS_FS_READ_FILE | \
++      LANDLOCK_ACCESS_FS_READ_DIR)
++
++#define ACCESS_RW ( \
++      ACCESS_RO | \
++      LANDLOCK_ACCESS_FS_WRITE_FILE)
++
++static int create_ruleset(struct __test_metadata *const _metadata,
++              const __u64 handled_access_fs, const struct rule rules[])
++{
++      int ruleset_fd, i;
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = handled_access_fs,
++      };
++
++      ASSERT_NE(NULL, rules) {
++              TH_LOG("No rule list");
++      }
++      ASSERT_NE(NULL, rules[0].path) {
++              TH_LOG("Empty rule list");
++      }
++
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd) {
++              TH_LOG("Failed to create a ruleset: %s", strerror(errno));
++      }
++
++      for (i = 0; rules[i].path; i++) {
++              add_path_beneath(_metadata, ruleset_fd, rules[i].access,
++                              rules[i].path);
++      }
++      return ruleset_fd;
++}
++
++static void enforce_ruleset(struct __test_metadata *const _metadata,
++              const int ruleset_fd)
++{
++      ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++      ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
++              TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
++      }
++}
++
++TEST_F_FORK(layout1, proc_nsfs)
++{
++      const struct rule rules[] = {
++              {
++                      .path = "/dev/null",
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      struct landlock_path_beneath_attr path_beneath;
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
++                      LANDLOCK_ACCESS_FS_READ_DIR, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY));
++      ASSERT_EQ(0, test_open("/dev/null", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY));
++
++      ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY));
++      /*
++       * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a
++       * disconnected path.  Such path cannot be identified and must then be
++       * allowed.
++       */
++      ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
++
++      /*
++       * Checks that it is not possible to add nsfs-like filesystem
++       * references to a ruleset.
++       */
++      path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
++              LANDLOCK_ACCESS_FS_WRITE_FILE,
++      path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
++      ASSERT_LE(0, path_beneath.parent_fd);
++      ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++                              &path_beneath, 0));
++      ASSERT_EQ(EBADFD, errno);
++      ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++TEST_F_FORK(layout1, unpriv) {
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      int ruleset_fd;
++
++      drop_caps(_metadata);
++
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
++      ASSERT_LE(0, ruleset_fd);
++      ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
++      ASSERT_EQ(EPERM, errno);
++
++      /* enforce_ruleset() calls prctl(no_new_privs). */
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, effective_access)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = ACCESS_RO,
++              },
++              {
++                      .path = file1_s2d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++      char buf;
++      int reg_fd;
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Tests on a directory. */
++      ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++      /* Tests on a file. */
++      ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++
++      /* Checks effective read and write actions. */
++      reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC);
++      ASSERT_LE(0, reg_fd);
++      ASSERT_EQ(1, write(reg_fd, ".", 1));
++      ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET));
++      ASSERT_EQ(1, read(reg_fd, &buf, 1));
++      ASSERT_EQ('.', buf);
++      ASSERT_EQ(0, close(reg_fd));
++
++      /* Just in case, double-checks effective actions. */
++      reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC);
++      ASSERT_LE(0, reg_fd);
++      ASSERT_EQ(-1, write(reg_fd, &buf, 1));
++      ASSERT_EQ(EBADF, errno);
++      ASSERT_EQ(0, close(reg_fd));
++}
++
++TEST_F_FORK(layout1, unhandled_access)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      /* Here, we only handle read accesses, not write accesses. */
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /*
++       * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE,
++       * opening for write-only should be allowed, but not read-write.
++       */
++      ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++
++      ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++}
++
++TEST_F_FORK(layout1, ruleset_overlap)
++{
++      const struct rule rules[] = {
++              /* These rules should be ORed among them. */
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_READ_DIR,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks s1d1 hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      /* Checks s1d2 hierarchy. */
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* Checks s1d3 hierarchy. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++}
++
++TEST_F_FORK(layout1, non_overlapping_accesses)
++{
++      const struct rule layer1[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_MAKE_REG,
++              },
++              {}
++      };
++      const struct rule layer2[] = {
++              {
++                      .path = dir_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++              },
++              {}
++      };
++      int ruleset_fd;
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
++                      layer1);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
++                      layer2);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Unchanged accesses for file creation. */
++      ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
++
++      /* Checks file removing. */
++      ASSERT_EQ(-1, unlink(file1_s1d2));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(0, unlink(file1_s1d3));
++}
++
++TEST_F_FORK(layout1, interleaved_masked_accesses)
++{
++      /*
++       * Checks overly restrictive rules:
++       * layer 1: allows R   s1d1/s1d2/s1d3/file1
++       * layer 2: allows RW  s1d1/s1d2/s1d3
++       *          allows  W  s1d1/s1d2
++       *          denies R   s1d1/s1d2
++       * layer 3: allows R   s1d1
++       * layer 4: allows R   s1d1/s1d2
++       *          denies  W  s1d1/s1d2
++       * layer 5: allows R   s1d1/s1d2
++       * layer 6: allows   X ----
++       * layer 7: allows  W  s1d1/s1d2
++       *          denies R   s1d1/s1d2
++       */
++      const struct rule layer1_read[] = {
++              /* Allows read access to file1_s1d3 with the first layer. */
++              {
++                      .path = file1_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      /* First rule with write restrictions. */
++      const struct rule layer2_read_write[] = {
++              /* Start by granting read-write access via its parent directory... */
++              {
++                      .path = dir_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              /* ...but also denies read access via its grandparent directory. */
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      const struct rule layer3_read[] = {
++              /* Allows read access via its great-grandparent directory. */
++              {
++                      .path = dir_s1d1,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      const struct rule layer4_read_write[] = {
++              /*
++               * Try to confuse the deny access by denying write (but not
++               * read) access via its grandparent directory.
++               */
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      const struct rule layer5_read[] = {
++              /*
++               * Try to override layer2's deny read access by explicitly
++               * allowing read access via file1_s1d3's grandparent.
++               */
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      const struct rule layer6_execute[] = {
++              /*
++               * Restricts an unrelated file hierarchy with a new access
++               * (non-overlapping) type.
++               */
++              {
++                      .path = dir_s2d1,
++                      .access = LANDLOCK_ACCESS_FS_EXECUTE,
++              },
++              {}
++      };
++      const struct rule layer7_read_write[] = {
++              /*
++               * Finally, denies read access to file1_s1d3 via its
++               * grandparent.
++               */
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      int ruleset_fd;
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++                      layer1_read);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks that read access is granted for file1_s1d3 with layer 1. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks that previous access rights are unchanged with layer 2. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++                      layer3_read);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks that previous access rights are unchanged with layer 3. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++      /* This time, denies write access for the file hierarchy. */
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /*
++       * Checks that the only change with layer 4 is that write access is
++       * denied.
++       */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++                      layer5_read);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks that previous access rights are unchanged with layer 5. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
++                      layer6_execute);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks that previous access rights are unchanged with layer 6. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++
++      ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks read access is now denied with layer 7. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, inherit_subset)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_READ_DIR,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      /* Write access is forbidden. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      /* Readdir access is allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* Write access is forbidden. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      /* Readdir access is allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /*
++       * Tests shared rule extension: the following rules should not grant
++       * any new access, only remove some.  Once enforced, these rules are
++       * ANDed with the previous ones.
++       */
++      add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
++                      dir_s1d2);
++      /*
++       * According to ruleset_fd, dir_s1d2 should now have the
++       * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
++       * access rights (even if this directory is opened a second time).
++       * However, when enforcing this updated ruleset, the ruleset tied to
++       * the current process (i.e. its domain) will still only have the
++       * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and
++       * LANDLOCK_ACCESS_FS_READ_DIR accesses, but
++       * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would
++       * be a privilege escalation.
++       */
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      /* Same tests and results as above. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d2. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      /* Readdir access is still allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d3. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      /* Readdir access is still allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /*
++       * Try to get more privileges by adding new access rights to the parent
++       * directory: dir_s1d1.
++       */
++      add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      /* Same tests and results as above. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d2. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      /* Readdir access is still allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d3. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      /* Readdir access is still allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /*
++       * Now, dir_s1d3 get a new rule tied to it, only allowing
++       * LANDLOCK_ACCESS_FS_WRITE_FILE.  The (kernel internal) difference is
++       * that there was no rule tied to it before.
++       */
++      add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
++                      dir_s1d3);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /*
++       * Same tests and results as above, except for open(dir_s1d3) which is
++       * now denied because the new rule mask the rule previously inherited
++       * from dir_s1d2.
++       */
++
++      /* Same tests and results as above. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d2. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      /* Readdir access is still allowed. */
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* It is still forbidden to write in file1_s1d3. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      /*
++       * Readdir of dir_s1d3 is still allowed because of the OR policy inside
++       * the same layer.
++       */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++}
++
++TEST_F_FORK(layout1, inherit_superset)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d3,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      /* Readdir access is denied for dir_s1d2. */
++      ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++      /* Readdir access is allowed for dir_s1d3. */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++      /* File access is allowed for file1_s1d3. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++      /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
++      add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Readdir access is still denied for dir_s1d2. */
++      ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++      /* Readdir access is still allowed for dir_s1d3. */
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++      /* File access is still allowed for file1_s1d3. */
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, max_layers)
++{
++      int i, err;
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      for (i = 0; i < 64; i++)
++              enforce_ruleset(_metadata, ruleset_fd);
++
++      for (i = 0; i < 2; i++) {
++              err = landlock_restrict_self(ruleset_fd, 0);
++              ASSERT_EQ(-1, err);
++              ASSERT_EQ(E2BIG, errno);
++      }
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, empty_or_same_ruleset)
++{
++      struct landlock_ruleset_attr ruleset_attr = {};
++      int ruleset_fd;
++
++      /* Tests empty handled_access_fs. */
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(-1, ruleset_fd);
++      ASSERT_EQ(ENOMSG, errno);
++
++      /* Enforces policy which deny read access to all files. */
++      ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++      /* Nests a policy which deny read access to all directories. */
++      ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++
++      /* Enforces a second time with the same ruleset. */
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, rule_on_mountpoint)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d1,
++                      .access = ACCESS_RO,
++              },
++              {
++                      /* dir_s3d2 is a mount point. */
++                      .path = dir_s3d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++      ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
++
++      ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_over_mountpoint)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d1,
++                      .access = ACCESS_RO,
++              },
++              {
++                      /* dir_s3d2 is a mount point. */
++                      .path = dir_s3d1,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++      ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++/*
++ * This test verifies that we can apply a landlock rule on the root directory
++ * (which might require special handling).
++ */
++TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
++{
++      struct rule rules[] = {
++              {
++                      .path = "/",
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks allowed access. */
++      ASSERT_EQ(0, test_open("/", O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++      rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE;
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks denied access (on a directory). */
++      ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_over_root_deny)
++{
++      const struct rule rules[] = {
++              {
++                      .path = "/",
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks denied access (on a directory). */
++      ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_inside_mount_ns)
++{
++      const struct rule rules[] = {
++              {
++                      .path = "s3d3",
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      int ruleset_fd;
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
++              TH_LOG("Failed to pivot root: %s", strerror(errno));
++      };
++      ASSERT_EQ(0, chdir("/"));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, test_open("s3d3", O_RDONLY));
++      ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++}
++
++TEST_F_FORK(layout1, mount_and_pivot)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s3d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
++      ASSERT_EQ(EPERM, errno);
++      ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
++      ASSERT_EQ(EPERM, errno);
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++TEST_F_FORK(layout1, move_mount)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s3d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++                              dir_s1d2, 0)) {
++              TH_LOG("Failed to move mount: %s", strerror(errno));
++      }
++
++      ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
++                              dir_s3d2, 0));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++                              dir_s1d2, 0));
++      ASSERT_EQ(EPERM, errno);
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++TEST_F_FORK(layout1, release_inodes)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d1,
++                      .access = ACCESS_RO,
++              },
++              {
++                      .path = dir_s3d2,
++                      .access = ACCESS_RO,
++              },
++              {
++                      .path = dir_s3d3,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      /* Unmount a file hierarchy while it is being used by a ruleset. */
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, umount(dir_s3d2));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
++      /* This dir_s3d3 would not be allowed and does not exist anyway. */
++      ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
++}
++
++enum relative_access {
++      REL_OPEN,
++      REL_CHDIR,
++      REL_CHROOT_ONLY,
++      REL_CHROOT_CHDIR,
++};
++
++static void test_relative_path(struct __test_metadata *const _metadata,
++              const enum relative_access rel)
++{
++      /*
++       * Common layer to check that chroot doesn't ignore it (i.e. a chroot
++       * is not a disconnected root directory).
++       */
++      const struct rule layer1_base[] = {
++              {
++                      .path = TMP_DIR,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      const struct rule layer2_subs[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = ACCESS_RO,
++              },
++              {
++                      .path = dir_s2d2,
++                      .access = ACCESS_RO,
++              },
++              {}
++      };
++      int dirfd, ruleset_fd;
++
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs);
++
++      ASSERT_LE(0, ruleset_fd);
++      switch (rel) {
++      case REL_OPEN:
++      case REL_CHDIR:
++              break;
++      case REL_CHROOT_ONLY:
++              ASSERT_EQ(0, chdir(dir_s2d2));
++              break;
++      case REL_CHROOT_CHDIR:
++              ASSERT_EQ(0, chdir(dir_s1d2));
++              break;
++      default:
++              ASSERT_TRUE(false);
++              return;
++      }
++
++      set_cap(_metadata, CAP_SYS_CHROOT);
++      enforce_ruleset(_metadata, ruleset_fd);
++
++      switch (rel) {
++      case REL_OPEN:
++              dirfd = open(dir_s1d2, O_DIRECTORY);
++              ASSERT_LE(0, dirfd);
++              break;
++      case REL_CHDIR:
++              ASSERT_EQ(0, chdir(dir_s1d2));
++              dirfd = AT_FDCWD;
++              break;
++      case REL_CHROOT_ONLY:
++              /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
++              ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
++                      TH_LOG("Failed to chroot: %s", strerror(errno));
++              }
++              dirfd = AT_FDCWD;
++              break;
++      case REL_CHROOT_CHDIR:
++              /* Do chroot into dir_s1d2. */
++              ASSERT_EQ(0, chroot(".")) {
++                      TH_LOG("Failed to chroot: %s", strerror(errno));
++              }
++              dirfd = AT_FDCWD;
++              break;
++      }
++
++      ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
++                      test_open_rel(dirfd, "..", O_RDONLY));
++      ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
++
++      if (rel == REL_CHROOT_ONLY) {
++              /* The current directory is dir_s2d2. */
++              ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY));
++      } else {
++              /* The current directory is dir_s1d2. */
++              ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY));
++      }
++
++      if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) {
++              /* Checks the root dir_s1d2. */
++              ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY));
++      }
++
++      if (rel != REL_CHROOT_CHDIR) {
++              ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
++
++              ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
++              ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
++      }
++
++      if (rel == REL_OPEN)
++              ASSERT_EQ(0, close(dirfd));
++      ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, relative_open)
++{
++      test_relative_path(_metadata, REL_OPEN);
++}
++
++TEST_F_FORK(layout1, relative_chdir)
++{
++      test_relative_path(_metadata, REL_CHDIR);
++}
++
++TEST_F_FORK(layout1, relative_chroot_only)
++{
++      test_relative_path(_metadata, REL_CHROOT_ONLY);
++}
++
++TEST_F_FORK(layout1, relative_chroot_chdir)
++{
++      test_relative_path(_metadata, REL_CHROOT_CHDIR);
++}
++
++static void copy_binary(struct __test_metadata *const _metadata,
++              const char *const dst_path)
++{
++      int dst_fd, src_fd;
++      struct stat statbuf;
++
++      dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
++      ASSERT_LE(0, dst_fd) {
++              TH_LOG("Failed to open \"%s\": %s", dst_path,
++                              strerror(errno));
++      }
++      src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
++      ASSERT_LE(0, src_fd) {
++              TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
++                              strerror(errno));
++      }
++      ASSERT_EQ(0, fstat(src_fd, &statbuf));
++      ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
++                              statbuf.st_size));
++      ASSERT_EQ(0, close(src_fd));
++      ASSERT_EQ(0, close(dst_fd));
++}
++
++static void test_execute(struct __test_metadata *const _metadata,
++              const int err, const char *const path)
++{
++      int status;
++      char *const argv[] = {(char *)path, NULL};
++      const pid_t child = fork();
++
++      ASSERT_LE(0, child);
++      if (child == 0) {
++              ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
++                      TH_LOG("Failed to execute \"%s\": %s", path,
++                                      strerror(errno));
++              };
++              ASSERT_EQ(err, errno);
++              _exit(_metadata->passed ? 2 : 1);
++              return;
++      }
++      ASSERT_EQ(child, waitpid(child, &status, 0));
++      ASSERT_EQ(1, WIFEXITED(status));
++      ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
++              TH_LOG("Unexpected return code for \"%s\": %s", path,
++                              strerror(errno));
++      };
++}
++
++TEST_F_FORK(layout1, execute)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_EXECUTE,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      copy_binary(_metadata, file1_s1d1);
++      copy_binary(_metadata, file1_s1d2);
++      copy_binary(_metadata, file1_s1d3);
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++      test_execute(_metadata, EACCES, file1_s1d1);
++
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      test_execute(_metadata, 0, file1_s1d2);
++
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      test_execute(_metadata, 0, file1_s1d3);
++}
++
++TEST_F_FORK(layout1, link)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_MAKE_REG,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlink(file1_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      /* Denies linking because of reparenting. */
++      ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
++      ASSERT_EQ(EXDEV, errno);
++
++      ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
++      ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
++}
++
++TEST_F_FORK(layout1, rename_file)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++              },
++              {
++                      .path = dir_s2d2,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /*
++       * Tries to replace a file, from a directory that allows file removal,
++       * but to a different directory (which also allows file removal).
++       */
++      ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++
++      /*
++       * Tries to replace a file, from a directory that denies file removal,
++       * to a different directory (which allows file removal).
++       */
++      ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++
++      /* Exchanges files and directories that partially allow removal. */
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EACCES, errno);
++
++      /* Renames files with different parents. */
++      ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(0, unlink(file1_s1d3));
++      ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
++      ASSERT_EQ(EXDEV, errno);
++
++      /* Exchanges and renames files with same parent. */
++      ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
++
++      /* Exchanges files and directories with same parent, twice. */
++      ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
++                              RENAME_EXCHANGE));
++}
++
++TEST_F_FORK(layout1, rename_dir)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++              },
++              {
++                      .path = dir_s2d1,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      /* Empties dir_s1d3 to allow renaming. */
++      ASSERT_EQ(0, unlink(file1_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Exchanges and renames directory to a different parent. */
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3));
++      ASSERT_EQ(EXDEV, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EXDEV, errno);
++
++      /*
++       * Exchanges directory to the same parent, which doesn't allow
++       * directory removal.
++       */
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(EACCES, errno);
++
++      /*
++       * Exchanges and renames directory to the same parent, which allows
++       * directory removal.
++       */
++      ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
++                              RENAME_EXCHANGE));
++      ASSERT_EQ(0, unlink(dir_s1d3));
++      ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
++      ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
++      ASSERT_EQ(0, rmdir(dir_s1d3));
++}
++
++TEST_F_FORK(layout1, remove_dir)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlink(file1_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(0, rmdir(dir_s1d3));
++      ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
++      ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
++
++      /* dir_s1d2 itself cannot be removed. */
++      ASSERT_EQ(-1, rmdir(dir_s1d2));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, rmdir(dir_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
++      ASSERT_EQ(EACCES, errno);
++}
++
++TEST_F_FORK(layout1, remove_file)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(-1, unlink(file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
++}
++
++static void test_make_file(struct __test_metadata *const _metadata,
++              const __u64 access, const mode_t mode, const dev_t dev)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = access,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, access, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file2_s1d1));
++      ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
++              TH_LOG("Failed to make file \"%s\": %s",
++                              file2_s1d1, strerror(errno));
++      };
++
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlink(file2_s1d2));
++
++      ASSERT_EQ(0, unlink(file1_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++
++      ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
++              TH_LOG("Failed to make file \"%s\": %s",
++                              file1_s1d2, strerror(errno));
++      };
++      ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
++      ASSERT_EQ(0, unlink(file2_s1d2));
++      ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
++
++      ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
++      ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++      ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
++}
++
++TEST_F_FORK(layout1, make_char)
++{
++      /* Creates a /dev/null device. */
++      set_cap(_metadata, CAP_MKNOD);
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
++                      makedev(1, 3));
++}
++
++TEST_F_FORK(layout1, make_block)
++{
++      /* Creates a /dev/loop0 device. */
++      set_cap(_metadata, CAP_MKNOD);
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
++                      makedev(7, 0));
++}
++
++TEST_F_FORK(layout1, make_reg_1)
++{
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0);
++}
++
++TEST_F_FORK(layout1, make_reg_2)
++{
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0);
++}
++
++TEST_F_FORK(layout1, make_sock)
++{
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0);
++}
++
++TEST_F_FORK(layout1, make_fifo)
++{
++      test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0);
++}
++
++TEST_F_FORK(layout1, make_sym)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file2_s1d1));
++      ASSERT_EQ(0, symlink("none", file2_s1d1));
++
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlink(file2_s1d2));
++
++      ASSERT_EQ(0, unlink(file1_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(-1, symlink("none", file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
++      ASSERT_EQ(EACCES, errno);
++
++      ASSERT_EQ(0, symlink("none", file1_s1d2));
++      ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
++      ASSERT_EQ(0, unlink(file2_s1d2));
++      ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
++
++      ASSERT_EQ(0, symlink("none", file1_s1d3));
++      ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
++      ASSERT_EQ(0, unlink(file2_s1d3));
++      ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
++}
++
++TEST_F_FORK(layout1, make_dir)
++{
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
++              },
++              {}
++      };
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++
++      ASSERT_EQ(0, unlink(file1_s1d1));
++      ASSERT_EQ(0, unlink(file1_s1d2));
++      ASSERT_EQ(0, unlink(file1_s1d3));
++
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Uses file_* as directory names. */
++      ASSERT_EQ(-1, mkdir(file1_s1d1, 0700));
++      ASSERT_EQ(EACCES, errno);
++      ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
++      ASSERT_EQ(0, mkdir(file1_s1d3, 0700));
++}
++
++static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
++              const int open_flags)
++{
++      static const char path_template[] = "/proc/self/fd/%d";
++      char procfd_path[sizeof(path_template) + 10];
++      const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
++                      path_template, fd);
++
++      ASSERT_LT(procfd_path_size, sizeof(procfd_path));
++      return open(procfd_path, open_flags);
++}
++
++TEST_F_FORK(layout1, proc_unlinked_file)
++{
++      const struct rule rules[] = {
++              {
++                      .path = file1_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      int reg_fd, proc_fd;
++      const int ruleset_fd = create_ruleset(_metadata,
++                      LANDLOCK_ACCESS_FS_READ_FILE |
++                      LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC);
++      ASSERT_LE(0, reg_fd);
++      ASSERT_EQ(0, unlink(file1_s1d2));
++
++      proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC);
++      ASSERT_LE(0, proc_fd);
++      ASSERT_EQ(0, close(proc_fd));
++
++      proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
++      ASSERT_EQ(-1, proc_fd) {
++              TH_LOG("Successfully opened /proc/self/fd/%d: %s",
++                              reg_fd, strerror(errno));
++      }
++      ASSERT_EQ(EACCES, errno);
++
++      ASSERT_EQ(0, close(reg_fd));
++}
++
++TEST_F_FORK(layout1, proc_pipe)
++{
++      int proc_fd;
++      int pipe_fds[2];
++      char buf = '\0';
++      const struct rule rules[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      /* Limits read and write access to files tied to the filesystem. */
++      const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++                      rules);
++
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks enforcement for normal files. */
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++
++      /* Checks access to pipes through FD. */
++      ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
++      ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
++              TH_LOG("Failed to write in pipe: %s", strerror(errno));
++      }
++      ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
++      ASSERT_EQ('.', buf);
++
++      /* Checks write access to pipe through /proc/self/fd . */
++      proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
++      ASSERT_LE(0, proc_fd);
++      ASSERT_EQ(1, write(proc_fd, ".", 1)) {
++              TH_LOG("Failed to write through /proc/self/fd/%d: %s",
++                              pipe_fds[1], strerror(errno));
++      }
++      ASSERT_EQ(0, close(proc_fd));
++
++      /* Checks read access to pipe through /proc/self/fd . */
++      proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
++      ASSERT_LE(0, proc_fd);
++      buf = '\0';
++      ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
++              TH_LOG("Failed to read through /proc/self/fd/%d: %s",
++                              pipe_fds[1], strerror(errno));
++      }
++      ASSERT_EQ(0, close(proc_fd));
++
++      ASSERT_EQ(0, close(pipe_fds[0]));
++      ASSERT_EQ(0, close(pipe_fds[1]));
++}
++
++FIXTURE(layout1_bind) {
++};
++
++FIXTURE_SETUP(layout1_bind)
++{
++      prepare_layout(_metadata);
++
++      create_layout1(_metadata);
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++FIXTURE_TEARDOWN(layout1_bind)
++{
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, umount(dir_s2d2));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++
++      remove_layout1(_metadata);
++
++      cleanup_layout(_metadata);
++}
++
++static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
++static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
++
++/*
++ * layout1_bind hierarchy:
++ *
++ * tmp
++ * â”œâ”€â”€ s1d1
++ * â”‚   â”œâ”€â”€ f1
++ * â”‚   â”œâ”€â”€ f2
++ * â”‚   â””── s1d2
++ * â”‚       â”œâ”€â”€ f1
++ * â”‚       â”œâ”€â”€ f2
++ * â”‚       â””── s1d3
++ * â”‚           â”œâ”€â”€ f1
++ * â”‚           â””── f2
++ * â”œâ”€â”€ s2d1
++ * â”‚   â”œâ”€â”€ f1
++ * â”‚   â””── s2d2
++ * â”‚       â”œâ”€â”€ f1
++ * â”‚       â”œâ”€â”€ f2
++ * â”‚       â””── s1d3
++ * â”‚           â”œâ”€â”€ f1
++ * â”‚           â””── f2
++ * â””── s3d1
++ *     â””── s3d2
++ *         â””── s3d3
++ */
++
++TEST_F_FORK(layout1_bind, no_restriction)
++{
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
++      ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
++      ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++      ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY));
++      ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY));
++      ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1_bind, same_content_same_file)
++{
++      /*
++       * Sets access right on parent directories of both source and
++       * destination mount points.
++       */
++      const struct rule layer1_parent[] = {
++              {
++                      .path = dir_s1d1,
++                      .access = ACCESS_RO,
++              },
++              {
++                      .path = dir_s2d1,
++                      .access = ACCESS_RW,
++              },
++              {}
++      };
++      /*
++       * Sets access rights on the same bind-mounted directories.  The result
++       * should be ACCESS_RW for both directories, but not both hierarchies
++       * because of the first layer.
++       */
++      const struct rule layer2_mount_point[] = {
++              {
++                      .path = dir_s1d2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = dir_s2d2,
++                      .access = ACCESS_RW,
++              },
++              {}
++      };
++      /* Only allow read-access to the s1d3 hierarchies. */
++      const struct rule layer3_source[] = {
++              {
++                      .path = dir_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {}
++      };
++      /* Removes all access rights. */
++      const struct rule layer4_destination[] = {
++              {
++                      .path = bind_file1_s1d3,
++                      .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      int ruleset_fd;
++
++      /* Sets rules for the parent directories. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks source hierarchy. */
++      ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* Checks destination hierarchy. */
++      ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR));
++      ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
++      ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++
++      /* Sets rules for the mount points. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks source hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      /* Checks destination hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
++      ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++      ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /* Sets a (shared) rule only on the source. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks source hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /* Checks destination hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++
++      ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
++      ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++      /* Sets a (shared) rule only on the destination. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks source hierarchy. */
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++
++      /* Checks destination hierarchy. */
++      ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY));
++      ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
++}
++
++#define LOWER_BASE    TMP_DIR "/lower"
++#define LOWER_DATA    LOWER_BASE "/data"
++static const char lower_fl1[] = LOWER_DATA "/fl1";
++static const char lower_dl1[] = LOWER_DATA "/dl1";
++static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
++static const char lower_fo1[] = LOWER_DATA "/fo1";
++static const char lower_do1[] = LOWER_DATA "/do1";
++static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2";
++static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
++
++static const char (*lower_base_files[])[] = {
++      &lower_fl1,
++      &lower_fo1,
++      NULL
++};
++static const char (*lower_base_directories[])[] = {
++      &lower_dl1,
++      &lower_do1,
++      NULL
++};
++static const char (*lower_sub_files[])[] = {
++      &lower_dl1_fl2,
++      &lower_do1_fo2,
++      &lower_do1_fl3,
++      NULL
++};
++
++#define UPPER_BASE    TMP_DIR "/upper"
++#define UPPER_DATA    UPPER_BASE "/data"
++#define UPPER_WORK    UPPER_BASE "/work"
++static const char upper_fu1[] = UPPER_DATA "/fu1";
++static const char upper_du1[] = UPPER_DATA "/du1";
++static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
++static const char upper_fo1[] = UPPER_DATA "/fo1";
++static const char upper_do1[] = UPPER_DATA "/do1";
++static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2";
++static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
++
++static const char (*upper_base_files[])[] = {
++      &upper_fu1,
++      &upper_fo1,
++      NULL
++};
++static const char (*upper_base_directories[])[] = {
++      &upper_du1,
++      &upper_do1,
++      NULL
++};
++static const char (*upper_sub_files[])[] = {
++      &upper_du1_fu2,
++      &upper_do1_fo2,
++      &upper_do1_fu3,
++      NULL
++};
++
++#define MERGE_BASE    TMP_DIR "/merge"
++#define MERGE_DATA    MERGE_BASE "/data"
++static const char merge_fl1[] = MERGE_DATA "/fl1";
++static const char merge_dl1[] = MERGE_DATA "/dl1";
++static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
++static const char merge_fu1[] = MERGE_DATA "/fu1";
++static const char merge_du1[] = MERGE_DATA "/du1";
++static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2";
++static const char merge_fo1[] = MERGE_DATA "/fo1";
++static const char merge_do1[] = MERGE_DATA "/do1";
++static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2";
++static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3";
++static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3";
++
++static const char (*merge_base_files[])[] = {
++      &merge_fl1,
++      &merge_fu1,
++      &merge_fo1,
++      NULL
++};
++static const char (*merge_base_directories[])[] = {
++      &merge_dl1,
++      &merge_du1,
++      &merge_do1,
++      NULL
++};
++static const char (*merge_sub_files[])[] = {
++      &merge_dl1_fl2,
++      &merge_du1_fu2,
++      &merge_do1_fo2,
++      &merge_do1_fl3,
++      &merge_do1_fu3,
++      NULL
++};
++
++/*
++ * layout2_overlay hierarchy:
++ *
++ * tmp
++ * â”œâ”€â”€ lower
++ * â”‚   â””── data
++ * â”‚       â”œâ”€â”€ dl1
++ * â”‚       â”‚   â””── fl2
++ * â”‚       â”œâ”€â”€ do1
++ * â”‚       â”‚   â”œâ”€â”€ fl3
++ * â”‚       â”‚   â””── fo2
++ * â”‚       â”œâ”€â”€ fl1
++ * â”‚       â””── fo1
++ * â”œâ”€â”€ merge
++ * â”‚   â””── data
++ * â”‚       â”œâ”€â”€ dl1
++ * â”‚       â”‚   â””── fl2
++ * â”‚       â”œâ”€â”€ do1
++ * â”‚       â”‚   â”œâ”€â”€ fl3
++ * â”‚       â”‚   â”œâ”€â”€ fo2
++ * â”‚       â”‚   â””── fu3
++ * â”‚       â”œâ”€â”€ du1
++ * â”‚       â”‚   â””── fu2
++ * â”‚       â”œâ”€â”€ fl1
++ * â”‚       â”œâ”€â”€ fo1
++ * â”‚       â””── fu1
++ * â””── upper
++ *     â”œâ”€â”€ data
++ *     â”‚   â”œâ”€â”€ do1
++ *     â”‚   â”‚   â”œâ”€â”€ fo2
++ *     â”‚   â”‚   â””── fu3
++ *     â”‚   â”œâ”€â”€ du1
++ *     â”‚   â”‚   â””── fu2
++ *     â”‚   â”œâ”€â”€ fo1
++ *     â”‚   â””── fu1
++ *     â””── work
++ *         â””── work
++ */
++
++FIXTURE(layout2_overlay) {
++};
++
++FIXTURE_SETUP(layout2_overlay)
++{
++      prepare_layout(_metadata);
++
++      create_directory(_metadata, LOWER_BASE);
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      /* Creates tmpfs mount points to get deterministic overlayfs. */
++      ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700"));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      create_file(_metadata, lower_fl1);
++      create_file(_metadata, lower_dl1_fl2);
++      create_file(_metadata, lower_fo1);
++      create_file(_metadata, lower_do1_fo2);
++      create_file(_metadata, lower_do1_fl3);
++
++      create_directory(_metadata, UPPER_BASE);
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700"));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      create_file(_metadata, upper_fu1);
++      create_file(_metadata, upper_du1_fu2);
++      create_file(_metadata, upper_fo1);
++      create_file(_metadata, upper_do1_fo2);
++      create_file(_metadata, upper_do1_fu3);
++      ASSERT_EQ(0, mkdir(UPPER_WORK, 0700));
++
++      create_directory(_metadata, MERGE_DATA);
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      set_cap(_metadata, CAP_DAC_OVERRIDE);
++      ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
++                              "lowerdir=" LOWER_DATA
++                              ",upperdir=" UPPER_DATA
++                              ",workdir=" UPPER_WORK));
++      clear_cap(_metadata, CAP_DAC_OVERRIDE);
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++FIXTURE_TEARDOWN(layout2_overlay)
++{
++      EXPECT_EQ(0, remove_path(lower_do1_fl3));
++      EXPECT_EQ(0, remove_path(lower_dl1_fl2));
++      EXPECT_EQ(0, remove_path(lower_fl1));
++      EXPECT_EQ(0, remove_path(lower_do1_fo2));
++      EXPECT_EQ(0, remove_path(lower_fo1));
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, umount(LOWER_BASE));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, remove_path(LOWER_BASE));
++
++      EXPECT_EQ(0, remove_path(upper_do1_fu3));
++      EXPECT_EQ(0, remove_path(upper_du1_fu2));
++      EXPECT_EQ(0, remove_path(upper_fu1));
++      EXPECT_EQ(0, remove_path(upper_do1_fo2));
++      EXPECT_EQ(0, remove_path(upper_fo1));
++      EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, umount(UPPER_BASE));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, remove_path(UPPER_BASE));
++
++      set_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, umount(MERGE_DATA));
++      clear_cap(_metadata, CAP_SYS_ADMIN);
++      EXPECT_EQ(0, remove_path(MERGE_DATA));
++
++      cleanup_layout(_metadata);
++}
++
++TEST_F_FORK(layout2_overlay, no_restriction)
++{
++      ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_do1, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY));
++      ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_du1, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_do1, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY));
++      ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY));
++
++      ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_du1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_do1, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY));
++      ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
++}
++
++#define for_each_path(path_list, path_entry, i)                       \
++      for (i = 0, path_entry = *path_list[i]; path_list[i];   \
++                      path_entry = *path_list[++i])
++
++TEST_F_FORK(layout2_overlay, same_content_different_file)
++{
++      /* Sets access right on parent directories of both layers. */
++      const struct rule layer1_base[] = {
++              {
++                      .path = LOWER_BASE,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = UPPER_BASE,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = MERGE_BASE,
++                      .access = ACCESS_RW,
++              },
++              {}
++      };
++      const struct rule layer2_data[] = {
++              {
++                      .path = LOWER_DATA,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = UPPER_DATA,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = MERGE_DATA,
++                      .access = ACCESS_RW,
++              },
++              {}
++      };
++      /* Sets access right on directories inside both layers. */
++      const struct rule layer3_subdirs[] = {
++              {
++                      .path = lower_dl1,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = lower_do1,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = upper_du1,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = upper_do1,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = merge_dl1,
++                      .access = ACCESS_RW,
++              },
++              {
++                      .path = merge_du1,
++                      .access = ACCESS_RW,
++              },
++              {
++                      .path = merge_do1,
++                      .access = ACCESS_RW,
++              },
++              {}
++      };
++      /* Tighten access rights to the files. */
++      const struct rule layer4_files[] = {
++              {
++                      .path = lower_dl1_fl2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = lower_do1_fo2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = lower_do1_fl3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = upper_du1_fu2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = upper_do1_fo2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = upper_do1_fu3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE,
++              },
++              {
++                      .path = merge_dl1_fl2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {
++                      .path = merge_du1_fu2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {
++                      .path = merge_do1_fo2,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {
++                      .path = merge_do1_fl3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {
++                      .path = merge_do1_fu3,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      const struct rule layer5_merge_only[] = {
++              {
++                      .path = MERGE_DATA,
++                      .access = LANDLOCK_ACCESS_FS_READ_FILE |
++                              LANDLOCK_ACCESS_FS_WRITE_FILE,
++              },
++              {}
++      };
++      int ruleset_fd;
++      size_t i;
++      const char *path_entry;
++
++      /* Sets rules on base directories (i.e. outside overlay scope). */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks lower layer. */
++      for_each_path(lower_base_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      for_each_path(lower_base_directories, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(lower_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      /* Checks upper layer. */
++      for_each_path(upper_base_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      for_each_path(upper_base_directories, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(upper_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      /*
++       * Checks that access rights are independent from the lower and upper
++       * layers: write access to upper files viewed through the merge point
++       * is still allowed, and write access to lower file viewed (and copied)
++       * through the merge point is still allowed.
++       */
++      for_each_path(merge_base_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++      for_each_path(merge_base_directories, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(merge_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++
++      /* Sets rules on data directories (i.e. inside overlay scope). */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks merge. */
++      for_each_path(merge_base_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++      for_each_path(merge_base_directories, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(merge_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++
++      /* Same checks with tighter rules. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks changes for lower layer. */
++      for_each_path(lower_base_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++      }
++      /* Checks changes for upper layer. */
++      for_each_path(upper_base_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++      }
++      /* Checks all merge accesses. */
++      for_each_path(merge_base_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++      }
++      for_each_path(merge_base_directories, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(merge_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++
++      /* Sets rules directly on overlayed files. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks unchanged accesses on lower layer. */
++      for_each_path(lower_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      /* Checks unchanged accesses on upper layer. */
++      for_each_path(upper_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++              ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++      }
++      /* Checks all merge accesses. */
++      for_each_path(merge_base_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++      }
++      for_each_path(merge_base_directories, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(merge_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++
++      /* Only allowes access to the merge hierarchy. */
++      ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only);
++      ASSERT_LE(0, ruleset_fd);
++      enforce_ruleset(_metadata, ruleset_fd);
++      ASSERT_EQ(0, close(ruleset_fd));
++
++      /* Checks new accesses on lower layer. */
++      for_each_path(lower_sub_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++      }
++      /* Checks new accesses on upper layer. */
++      for_each_path(upper_sub_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++      }
++      /* Checks all merge accesses. */
++      for_each_path(merge_base_files, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++      }
++      for_each_path(merge_base_directories, path_entry, i) {
++              ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++      }
++      for_each_path(merge_sub_files, path_entry, i) {
++              ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++      }
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+new file mode 100644
+index 0000000000000..15fbef9cc8496
+--- /dev/null
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -0,0 +1,337 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Ptrace
++ *
++ * Copyright Â© 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright Â© 2019-2020 ANSSI
++ */
++
++#define _GNU_SOURCE
++#include <errno.h>
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <signal.h>
++#include <sys/prctl.h>
++#include <sys/ptrace.h>
++#include <sys/types.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "common.h"
++
++static void create_domain(struct __test_metadata *const _metadata)
++{
++      int ruleset_fd;
++      struct landlock_ruleset_attr ruleset_attr = {
++              .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
++      };
++
++      ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++                      sizeof(ruleset_attr), 0);
++      EXPECT_LE(0, ruleset_fd) {
++              TH_LOG("Failed to create a ruleset: %s", strerror(errno));
++      }
++      EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++      EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
++      EXPECT_EQ(0, close(ruleset_fd));
++}
++
++static int test_ptrace_read(const pid_t pid)
++{
++      static const char path_template[] = "/proc/%d/environ";
++      char procenv_path[sizeof(path_template) + 10];
++      int procenv_path_size, fd;
++
++      procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
++                      path_template, pid);
++      if (procenv_path_size >= sizeof(procenv_path))
++              return E2BIG;
++
++      fd = open(procenv_path, O_RDONLY | O_CLOEXEC);
++      if (fd < 0)
++              return errno;
++      /*
++       * Mixing error codes from close(2) and open(2) should not lead to any
++       * (access type) confusion for this test.
++       */
++      if (close(fd) != 0)
++              return errno;
++      return 0;
++}
++
++FIXTURE(hierarchy) { };
++
++FIXTURE_VARIANT(hierarchy) {
++      const bool domain_both;
++      const bool domain_parent;
++      const bool domain_child;
++};
++
++/*
++ * Test multiple tracing combinations between a parent process P1 and a child
++ * process P2.
++ *
++ * Yama's scoped ptrace is presumed disabled.  If enabled, this optional
++ * restriction is enforced in addition to any Landlock check, which means that
++ * all P2 requests to trace P1 would be denied.
++ */
++
++/*
++ *        No domain
++ *
++ *   P1-.               P1 -> P2 : allow
++ *       \              P2 -> P1 : allow
++ *        'P2
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
++      .domain_both = false,
++      .domain_parent = false,
++      .domain_child = false,
++};
++
++/*
++ *        Child domain
++ *
++ *   P1--.              P1 -> P2 : allow
++ *        \             P2 -> P1 : deny
++ *        .'-----.
++ *        |  P2  |
++ *        '------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
++      .domain_both = false,
++      .domain_parent = false,
++      .domain_child = true,
++};
++
++/*
++ *        Parent domain
++ * .------.
++ * |  P1  --.           P1 -> P2 : deny
++ * '------'  \          P2 -> P1 : allow
++ *            '
++ *            P2
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
++      .domain_both = false,
++      .domain_parent = true,
++      .domain_child = false,
++};
++
++/*
++ *        Parent + child domain (siblings)
++ * .------.
++ * |  P1  ---.          P1 -> P2 : deny
++ * '------'   \         P2 -> P1 : deny
++ *         .---'--.
++ *         |  P2  |
++ *         '------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
++      .domain_both = false,
++      .domain_parent = true,
++      .domain_child = true,
++};
++
++/*
++ *         Same domain (inherited)
++ * .-------------.
++ * | P1----.     |      P1 -> P2 : allow
++ * |        \    |      P2 -> P1 : allow
++ * |         '   |
++ * |         P2  |
++ * '-------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
++      .domain_both = true,
++      .domain_parent = false,
++      .domain_child = false,
++};
++
++/*
++ *         Inherited + child domain
++ * .-----------------.
++ * |  P1----.        |  P1 -> P2 : allow
++ * |         \       |  P2 -> P1 : deny
++ * |        .-'----. |
++ * |        |  P2  | |
++ * |        '------' |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
++      .domain_both = true,
++      .domain_parent = false,
++      .domain_child = true,
++};
++
++/*
++ *         Inherited + parent domain
++ * .-----------------.
++ * |.------.         |  P1 -> P2 : deny
++ * ||  P1  ----.     |  P2 -> P1 : allow
++ * |'------'    \    |
++ * |             '   |
++ * |             P2  |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
++      .domain_both = true,
++      .domain_parent = true,
++      .domain_child = false,
++};
++
++/*
++ *         Inherited + parent and child domain (siblings)
++ * .-----------------.
++ * | .------.        |  P1 -> P2 : deny
++ * | |  P1  .        |  P2 -> P1 : deny
++ * | '------'\       |
++ * |          \      |
++ * |        .--'---. |
++ * |        |  P2  | |
++ * |        '------' |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
++      .domain_both = true,
++      .domain_parent = true,
++      .domain_child = true,
++};
++
++FIXTURE_SETUP(hierarchy)
++{ }
++
++FIXTURE_TEARDOWN(hierarchy)
++{ }
++
++/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
++TEST_F(hierarchy, trace)
++{
++      pid_t child, parent;
++      int status, err_proc_read;
++      int pipe_child[2], pipe_parent[2];
++      char buf_parent;
++      long ret;
++
++      /*
++       * Removes all effective and permitted capabilities to not interfere
++       * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
++       */
++      drop_caps(_metadata);
++
++      parent = getpid();
++      ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
++      ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
++      if (variant->domain_both) {
++              create_domain(_metadata);
++              if (!_metadata->passed)
++                      /* Aborts before forking. */
++                      return;
++      }
++
++      child = fork();
++      ASSERT_LE(0, child);
++      if (child == 0) {
++              char buf_child;
++
++              ASSERT_EQ(0, close(pipe_parent[1]));
++              ASSERT_EQ(0, close(pipe_child[0]));
++              if (variant->domain_child)
++                      create_domain(_metadata);
++
++              /* Waits for the parent to be in a domain, if any. */
++              ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
++
++              /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */
++              err_proc_read = test_ptrace_read(parent);
++              ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
++              if (variant->domain_child) {
++                      EXPECT_EQ(-1, ret);
++                      EXPECT_EQ(EPERM, errno);
++                      EXPECT_EQ(EACCES, err_proc_read);
++              } else {
++                      EXPECT_EQ(0, ret);
++                      EXPECT_EQ(0, err_proc_read);
++              }
++              if (ret == 0) {
++                      ASSERT_EQ(parent, waitpid(parent, &status, 0));
++                      ASSERT_EQ(1, WIFSTOPPED(status));
++                      ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0));
++              }
++
++              /* Tests child PTRACE_TRACEME. */
++              ret = ptrace(PTRACE_TRACEME);
++              if (variant->domain_parent) {
++                      EXPECT_EQ(-1, ret);
++                      EXPECT_EQ(EPERM, errno);
++              } else {
++                      EXPECT_EQ(0, ret);
++              }
++
++              /*
++               * Signals that the PTRACE_ATTACH test is done and the
++               * PTRACE_TRACEME test is ongoing.
++               */
++              ASSERT_EQ(1, write(pipe_child[1], ".", 1));
++
++              if (!variant->domain_parent) {
++                      ASSERT_EQ(0, raise(SIGSTOP));
++              }
++
++              /* Waits for the parent PTRACE_ATTACH test. */
++              ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
++              _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
++              return;
++      }
++
++      ASSERT_EQ(0, close(pipe_child[1]));
++      ASSERT_EQ(0, close(pipe_parent[0]));
++      if (variant->domain_parent)
++              create_domain(_metadata);
++
++      /* Signals that the parent is in a domain, if any. */
++      ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
++
++      /*
++       * Waits for the child to test PTRACE_ATTACH on the parent and start
++       * testing PTRACE_TRACEME.
++       */
++      ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
++
++      /* Tests child PTRACE_TRACEME. */
++      if (!variant->domain_parent) {
++              ASSERT_EQ(child, waitpid(child, &status, 0));
++              ASSERT_EQ(1, WIFSTOPPED(status));
++              ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
++      } else {
++              /* The child should not be traced by the parent. */
++              EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
++              EXPECT_EQ(ESRCH, errno);
++      }
++
++      /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */
++      err_proc_read = test_ptrace_read(child);
++      ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
++      if (variant->domain_parent) {
++              EXPECT_EQ(-1, ret);
++              EXPECT_EQ(EPERM, errno);
++              EXPECT_EQ(EACCES, err_proc_read);
++      } else {
++              EXPECT_EQ(0, ret);
++              EXPECT_EQ(0, err_proc_read);
++      }
++      if (ret == 0) {
++              ASSERT_EQ(child, waitpid(child, &status, 0));
++              ASSERT_EQ(1, WIFSTOPPED(status));
++              ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
++      }
++
++      /* Signals that the parent PTRACE_ATTACH test is done. */
++      ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
++      ASSERT_EQ(child, waitpid(child, &status, 0));
++      if (WIFSIGNALED(status) || !WIFEXITED(status) ||
++                      WEXITSTATUS(status) != EXIT_SUCCESS)
++              _metadata->passed = 0;
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c
+new file mode 100644
+index 0000000000000..3f9ccbf52783a
+--- /dev/null
++++ b/tools/testing/selftests/landlock/true.c
+@@ -0,0 +1,5 @@
++// SPDX-License-Identifier: GPL-2.0
++int main(void)
++{
++      return 0;
++}
+-- 
+2.39.2
+
diff --git a/queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch b/queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch
new file mode 100644 (file)
index 0000000..ffdf1da
--- /dev/null
@@ -0,0 +1,123 @@
+From 54bbaa4518388e88b866567125ccc67d87b9a423 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 05:32:29 +0000
+Subject: selftests/landlock: Skip overlayfs tests when not supported
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Xu <jeffxu@google.com>
+
+[ Upstream commit 366617a69e60610912836570546f118006ebc7cb ]
+
+overlayfs may be disabled in the kernel configuration, causing related
+tests to fail.  Check that overlayfs is supported at runtime, so we can
+skip layout2_overlay.* accordingly.
+
+Signed-off-by: Jeff Xu <jeffxu@google.com>
+Reviewed-by: Guenter Roeck <groeck@chromium.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230113053229.1281774-2-jeffxu@google.com
+[mic: Reword comments and constify variables]
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/landlock/fs_test.c | 47 ++++++++++++++++++++++
+ 1 file changed, 47 insertions(+)
+
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+index 10c9a1e4ebd9b..db153452b110a 100644
+--- a/tools/testing/selftests/landlock/fs_test.c
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -11,6 +11,7 @@
+ #include <fcntl.h>
+ #include <linux/landlock.h>
+ #include <sched.h>
++#include <stdio.h>
+ #include <string.h>
+ #include <sys/capability.h>
+ #include <sys/mount.h>
+@@ -74,6 +75,40 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+  *         â””── s3d3
+  */
++static bool fgrep(FILE *const inf, const char *const str)
++{
++      char line[32];
++      const int slen = strlen(str);
++
++      while (!feof(inf)) {
++              if (!fgets(line, sizeof(line), inf))
++                      break;
++              if (strncmp(line, str, slen))
++                      continue;
++
++              return true;
++      }
++
++      return false;
++}
++
++static bool supports_overlayfs(void)
++{
++      bool res;
++      FILE *const inf = fopen("/proc/filesystems", "r");
++
++      /*
++       * Consider that the filesystem is supported if we cannot get the
++       * supported ones.
++       */
++      if (!inf)
++              return true;
++
++      res = fgrep(inf, "nodev\toverlay\n");
++      fclose(inf);
++      return res;
++}
++
+ static void mkdir_parents(struct __test_metadata *const _metadata,
+               const char *const path)
+ {
+@@ -2416,6 +2451,9 @@ FIXTURE(layout2_overlay) {
+ FIXTURE_SETUP(layout2_overlay)
+ {
++      if (!supports_overlayfs())
++              SKIP(return, "overlayfs is not supported");
++
+       prepare_layout(_metadata);
+       create_directory(_metadata, LOWER_BASE);
+@@ -2453,6 +2491,9 @@ FIXTURE_SETUP(layout2_overlay)
+ FIXTURE_TEARDOWN(layout2_overlay)
+ {
++      if (!supports_overlayfs())
++              SKIP(return, "overlayfs is not supported");
++
+       EXPECT_EQ(0, remove_path(lower_do1_fl3));
+       EXPECT_EQ(0, remove_path(lower_dl1_fl2));
+       EXPECT_EQ(0, remove_path(lower_fl1));
+@@ -2484,6 +2525,9 @@ FIXTURE_TEARDOWN(layout2_overlay)
+ TEST_F_FORK(layout2_overlay, no_restriction)
+ {
++      if (!supports_overlayfs())
++              SKIP(return, "overlayfs is not supported");
++
+       ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
+       ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
+       ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
+@@ -2647,6 +2691,9 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+       size_t i;
+       const char *path_entry;
++      if (!supports_overlayfs())
++              SKIP(return, "overlayfs is not supported");
++
+       /* Sets rules on base directories (i.e. outside overlay scope). */
+       ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+       ASSERT_LE(0, ruleset_fd);
+-- 
+2.39.2
+
diff --git a/queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch b/queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch
new file mode 100644 (file)
index 0000000..bc08571
--- /dev/null
@@ -0,0 +1,220 @@
+From 16905b2ec61fefd3ae8a5ed6e13c12ce98bd579e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 14 Jan 2023 02:03:06 +0000
+Subject: selftests/landlock: Test ptrace as much as possible with Yama
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Xu <jeffxu@google.com>
+
+[ Upstream commit 8677e555f17f51321d0730b945aeb7d4b95f998f ]
+
+Update ptrace tests according to all potential Yama security policies.
+This is required to make such tests pass even if Yama is enabled.
+
+Tests are not skipped but they now check both Landlock and Yama boundary
+restrictions at run time to keep a maximum test coverage (i.e. positive
+and negative testing).
+
+Signed-off-by: Jeff Xu <jeffxu@google.com>
+Link: https://lore.kernel.org/r/20230114020306.1407195-2-jeffxu@google.com
+Cc: stable@vger.kernel.org
+[mic: Add curly braces around EXPECT_EQ() to make it build, and improve
+commit message]
+Co-developed-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../testing/selftests/landlock/ptrace_test.c  | 113 +++++++++++++++---
+ 1 file changed, 96 insertions(+), 17 deletions(-)
+
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+index 090adadfe2dc3..14745cceb059a 100644
+--- a/tools/testing/selftests/landlock/ptrace_test.c
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -19,6 +19,12 @@
+ #include "common.h"
++/* Copied from security/yama/yama_lsm.c */
++#define YAMA_SCOPE_DISABLED 0
++#define YAMA_SCOPE_RELATIONAL 1
++#define YAMA_SCOPE_CAPABILITY 2
++#define YAMA_SCOPE_NO_ATTACH 3
++
+ static void create_domain(struct __test_metadata *const _metadata)
+ {
+       int ruleset_fd;
+@@ -59,6 +65,25 @@ static int test_ptrace_read(const pid_t pid)
+       return 0;
+ }
++static int get_yama_ptrace_scope(void)
++{
++      int ret;
++      char buf[2] = {};
++      const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
++
++      if (fd < 0)
++              return 0;
++
++      if (read(fd, buf, 1) < 0) {
++              close(fd);
++              return -1;
++      }
++
++      ret = atoi(buf);
++      close(fd);
++      return ret;
++}
++
+ /* clang-format off */
+ FIXTURE(hierarchy) {};
+ /* clang-format on */
+@@ -228,8 +253,51 @@ TEST_F(hierarchy, trace)
+       pid_t child, parent;
+       int status, err_proc_read;
+       int pipe_child[2], pipe_parent[2];
++      int yama_ptrace_scope;
+       char buf_parent;
+       long ret;
++      bool can_read_child, can_trace_child, can_read_parent, can_trace_parent;
++
++      yama_ptrace_scope = get_yama_ptrace_scope();
++      ASSERT_LE(0, yama_ptrace_scope);
++
++      if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
++              TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
++                     yama_ptrace_scope);
++
++      /*
++       * can_read_child is true if a parent process can read its child
++       * process, which is only the case when the parent process is not
++       * isolated from the child with a dedicated Landlock domain.
++       */
++      can_read_child = !variant->domain_parent;
++
++      /*
++       * can_trace_child is true if a parent process can trace its child
++       * process.  This depends on two conditions:
++       * - The parent process is not isolated from the child with a dedicated
++       *   Landlock domain.
++       * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL).
++       */
++      can_trace_child = can_read_child &&
++                        yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL;
++
++      /*
++       * can_read_parent is true if a child process can read its parent
++       * process, which is only the case when the child process is not
++       * isolated from the parent with a dedicated Landlock domain.
++       */
++      can_read_parent = !variant->domain_child;
++
++      /*
++       * can_trace_parent is true if a child process can trace its parent
++       * process.  This depends on two conditions:
++       * - The child process is not isolated from the parent with a dedicated
++       *   Landlock domain.
++       * - Yama is disabled (YAMA_SCOPE_DISABLED).
++       */
++      can_trace_parent = can_read_parent &&
++                         yama_ptrace_scope <= YAMA_SCOPE_DISABLED;
+       /*
+        * Removes all effective and permitted capabilities to not interfere
+@@ -260,16 +328,21 @@ TEST_F(hierarchy, trace)
+               /* Waits for the parent to be in a domain, if any. */
+               ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+-              /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */
++              /* Tests PTRACE_MODE_READ on the parent. */
+               err_proc_read = test_ptrace_read(parent);
++              if (can_read_parent) {
++                      EXPECT_EQ(0, err_proc_read);
++              } else {
++                      EXPECT_EQ(EACCES, err_proc_read);
++              }
++
++              /* Tests PTRACE_ATTACH on the parent. */
+               ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
+-              if (variant->domain_child) {
++              if (can_trace_parent) {
++                      EXPECT_EQ(0, ret);
++              } else {
+                       EXPECT_EQ(-1, ret);
+                       EXPECT_EQ(EPERM, errno);
+-                      EXPECT_EQ(EACCES, err_proc_read);
+-              } else {
+-                      EXPECT_EQ(0, ret);
+-                      EXPECT_EQ(0, err_proc_read);
+               }
+               if (ret == 0) {
+                       ASSERT_EQ(parent, waitpid(parent, &status, 0));
+@@ -279,11 +352,11 @@ TEST_F(hierarchy, trace)
+               /* Tests child PTRACE_TRACEME. */
+               ret = ptrace(PTRACE_TRACEME);
+-              if (variant->domain_parent) {
++              if (can_trace_child) {
++                      EXPECT_EQ(0, ret);
++              } else {
+                       EXPECT_EQ(-1, ret);
+                       EXPECT_EQ(EPERM, errno);
+-              } else {
+-                      EXPECT_EQ(0, ret);
+               }
+               /*
+@@ -292,7 +365,7 @@ TEST_F(hierarchy, trace)
+                */
+               ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+-              if (!variant->domain_parent) {
++              if (can_trace_child) {
+                       ASSERT_EQ(0, raise(SIGSTOP));
+               }
+@@ -317,7 +390,7 @@ TEST_F(hierarchy, trace)
+       ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+       /* Tests child PTRACE_TRACEME. */
+-      if (!variant->domain_parent) {
++      if (can_trace_child) {
+               ASSERT_EQ(child, waitpid(child, &status, 0));
+               ASSERT_EQ(1, WIFSTOPPED(status));
+               ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+@@ -327,17 +400,23 @@ TEST_F(hierarchy, trace)
+               EXPECT_EQ(ESRCH, errno);
+       }
+-      /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */
++      /* Tests PTRACE_MODE_READ on the child. */
+       err_proc_read = test_ptrace_read(child);
++      if (can_read_child) {
++              EXPECT_EQ(0, err_proc_read);
++      } else {
++              EXPECT_EQ(EACCES, err_proc_read);
++      }
++
++      /* Tests PTRACE_ATTACH on the child. */
+       ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
+-      if (variant->domain_parent) {
++      if (can_trace_child) {
++              EXPECT_EQ(0, ret);
++      } else {
+               EXPECT_EQ(-1, ret);
+               EXPECT_EQ(EPERM, errno);
+-              EXPECT_EQ(EACCES, err_proc_read);
+-      } else {
+-              EXPECT_EQ(0, ret);
+-              EXPECT_EQ(0, err_proc_read);
+       }
++
+       if (ret == 0) {
+               ASSERT_EQ(child, waitpid(child, &status, 0));
+               ASSERT_EQ(1, WIFSTOPPED(status));
+-- 
+2.39.2
+
diff --git a/queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch b/queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch
new file mode 100644 (file)
index 0000000..3f92954
--- /dev/null
@@ -0,0 +1,58 @@
+From 18e64ec0888c0efc74dd751f978d97e046941ed1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 17:36:46 +0800
+Subject: selftests: nft_nat: ensuring the listening side is up before starting
+ the client
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 2067e7a00aa604b94de31d64f29b8893b1696f26 ]
+
+The test_local_dnat_portonly() function initiates the client-side as
+soon as it sets the listening side to the background. This could lead to
+a race condition where the server may not be ready to listen. To ensure
+that the server-side is up and running before initiating the
+client-side, a delay is introduced to the test_local_dnat_portonly()
+function.
+
+Before the fix:
+  # ./nft_nat.sh
+  PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU
+  PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU
+  PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush
+  PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU
+  2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused
+  ERROR: inet port rewrite
+
+After the fix:
+  # ./nft_nat.sh
+  PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ
+  PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ
+  PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush
+  PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ
+  PASS: inet port rewrite without l3 address
+
+Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/netfilter/nft_nat.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
+index 4e15e81673104..67697d8ea59a5 100755
+--- a/tools/testing/selftests/netfilter/nft_nat.sh
++++ b/tools/testing/selftests/netfilter/nft_nat.sh
+@@ -404,6 +404,8 @@ EOF
+       echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+       sc_s=$!
++      sleep 1
++
+       result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+       if [ "$result" = "SERVER-inet" ];then
+-- 
+2.39.2
+
index 89c6906a1976a8bc3ecdf91e50257a0fdc254e88..92235b40fffe175a51c8d01df186be300c9c4dba 100644 (file)
@@ -10,3 +10,61 @@ ext4-move-where-set-the-may_inline_data-flag-is-set.patch
 ext4-fix-warning-in-ext4_update_inline_data.patch
 ext4-zero-i_disksize-when-initializing-the-bootloader-inode.patch
 nfc-change-order-inside-nfc_se_io-error-path.patch
+landlock-add-object-management.patch
+selftests-landlock-add-user-space-tests.patch
+selftests-landlock-skip-overlayfs-tests-when-not-sup.patch
+udf-fix-off-by-one-error-when-discarding-preallocati.patch
+selftests-landlock-add-clang-format-exceptions.patch
+selftests-landlock-test-ptrace-as-much-as-possible-w.patch
+irq-fix-typos-in-comments.patch
+irqdomain-look-for-existing-mapping-only-once.patch
+irqdomain-refactor-__irq_domain_alloc_irqs.patch
+irqdomain-fix-mapping-creation-race.patch
+irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch
+irqdomain-fix-domain-registration-race.patch
+software-node-introduce-device_add_software_node.patch
+usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch
+usb-dwc3-pci-id-for-tiger-lake-cpu.patch
+usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch
+usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch
+usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch
+riscv-using-patchable_function_entry-instead-of-mcou.patch
+riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
+riscv-ftrace-reduce-the-detour-code-size-to-half.patch
+iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch
+iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
+efi-earlycon-replace-open-coded-strnchrnul.patch
+arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch
+risc-v-avoid-dereferening-null-regs-in-die.patch
+riscv-avoid-enabling-interrupts-in-die.patch
+riscv-add-header-include-guards-to-insn.h.patch
+scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch
+ext4-fix-possible-corruption-when-moving-a-directory.patch
+drm-nouveau-kms-nv50-remove-unused-functions.patch
+drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch
+drm-msm-fix-potential-invalid-ptr-free.patch
+drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch
+drm-msm-document-and-rename-preempt_lock.patch
+drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch
+drm-msm-a5xx-fix-context-faults-during-ring-switch.patch
+bgmac-fix-initial-chip-reset-to-support-bcm5358.patch
+nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch
+powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch
+ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch
+selftests-nft_nat-ensuring-the-listening-side-is-up-.patch
+net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch
+net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch
+net-caif-fix-use-after-free-in-cfusbl_device_notify.patch
+net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch
+net-phylib-get-rid-of-unnecessary-locking.patch
+bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch
+netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch
+netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch
+btf-fix-resolving-btf_kind_var-after-array-struct-un.patch
+net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch
+scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch
+platform-x86-mlx_platform-select-regmap-instead-of-d.patch
+net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch
+sunrpc-fix-a-server-shutdown-leak.patch
+riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch
+risc-v-don-t-check-text_mutex-during-stop_machine.patch
diff --git a/queue-5.10/software-node-introduce-device_add_software_node.patch b/queue-5.10/software-node-introduce-device_add_software_node.patch
new file mode 100644 (file)
index 0000000..55df990
--- /dev/null
@@ -0,0 +1,135 @@
+From 5006088daaf713e090bef68ab9d5a8082d043762 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:11 +0300
+Subject: software node: Introduce device_add_software_node()
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit e68d0119e3284334de5650a1ac42ef4e179f895e ]
+
+This helper will register a software node and then assign
+it to device at the same time. The function will also make
+sure that the device can't have more than one software node.
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-2-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/swnode.c    | 71 +++++++++++++++++++++++++++++++++++-----
+ include/linux/property.h |  3 ++
+ 2 files changed, 65 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
+index d2fb3eb5816c3..572a53e6f2e88 100644
+--- a/drivers/base/swnode.c
++++ b/drivers/base/swnode.c
+@@ -48,6 +48,19 @@ EXPORT_SYMBOL_GPL(is_software_node);
+                                    struct swnode, fwnode) : NULL;     \
+       })
++static inline struct swnode *dev_to_swnode(struct device *dev)
++{
++      struct fwnode_handle *fwnode = dev_fwnode(dev);
++
++      if (!fwnode)
++              return NULL;
++
++      if (!is_software_node(fwnode))
++              fwnode = fwnode->secondary;
++
++      return to_swnode(fwnode);
++}
++
+ static struct swnode *
+ software_node_to_swnode(const struct software_node *node)
+ {
+@@ -850,22 +863,62 @@ void fwnode_remove_software_node(struct fwnode_handle *fwnode)
+ }
+ EXPORT_SYMBOL_GPL(fwnode_remove_software_node);
++/**
++ * device_add_software_node - Assign software node to a device
++ * @dev: The device the software node is meant for.
++ * @swnode: The software node.
++ *
++ * This function will register @swnode and make it the secondary firmware node
++ * pointer of @dev. If @dev has no primary node, then @swnode will become the primary
++ * node.
++ */
++int device_add_software_node(struct device *dev, const struct software_node *swnode)
++{
++      int ret;
++
++      /* Only one software node per device. */
++      if (dev_to_swnode(dev))
++              return -EBUSY;
++
++      ret = software_node_register(swnode);
++      if (ret)
++              return ret;
++
++      set_secondary_fwnode(dev, software_node_fwnode(swnode));
++
++      return 0;
++}
++EXPORT_SYMBOL_GPL(device_add_software_node);
++
++/**
++ * device_remove_software_node - Remove device's software node
++ * @dev: The device with the software node.
++ *
++ * This function will unregister the software node of @dev.
++ */
++void device_remove_software_node(struct device *dev)
++{
++      struct swnode *swnode;
++
++      swnode = dev_to_swnode(dev);
++      if (!swnode)
++              return;
++
++      software_node_notify(dev, KOBJ_REMOVE);
++      set_secondary_fwnode(dev, NULL);
++      kobject_put(&swnode->kobj);
++}
++EXPORT_SYMBOL_GPL(device_remove_software_node);
++
+ int software_node_notify(struct device *dev, unsigned long action)
+ {
+-      struct fwnode_handle *fwnode = dev_fwnode(dev);
+       struct swnode *swnode;
+       int ret;
+-      if (!fwnode)
+-              return 0;
+-
+-      if (!is_software_node(fwnode))
+-              fwnode = fwnode->secondary;
+-      if (!is_software_node(fwnode))
++      swnode = dev_to_swnode(dev);
++      if (!swnode)
+               return 0;
+-      swnode = to_swnode(fwnode);
+-
+       switch (action) {
+       case KOBJ_ADD:
+               ret = sysfs_create_link(&dev->kobj, &swnode->kobj,
+diff --git a/include/linux/property.h b/include/linux/property.h
+index 2d4542629d80b..3b6093f6bd04c 100644
+--- a/include/linux/property.h
++++ b/include/linux/property.h
+@@ -485,4 +485,7 @@ fwnode_create_software_node(const struct property_entry *properties,
+                           const struct fwnode_handle *parent);
+ void fwnode_remove_software_node(struct fwnode_handle *fwnode);
++int device_add_software_node(struct device *dev, const struct software_node *swnode);
++void device_remove_software_node(struct device *dev);
++
+ #endif /* _LINUX_PROPERTY_H_ */
+-- 
+2.39.2
+
diff --git a/queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch b/queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch
new file mode 100644 (file)
index 0000000..b0135ef
--- /dev/null
@@ -0,0 +1,48 @@
+From c2f7cb0f1f288f12942f9ae91387ec83625a8490 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 16:08:32 -0500
+Subject: SUNRPC: Fix a server shutdown leak
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 9ca6705d9d609441d34f8b853e1e4a6369b3b171 ]
+
+Fix a race where kthread_stop() may prevent the threadfn from ever getting
+called.  If that happens the svc_rqst will not be cleaned up.
+
+Fixes: ed6473ddc704 ("NFSv4: Fix callback server shutdown")
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/svc.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index d38788cd9433a..af657a482ad2d 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -800,6 +800,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
+ static int
+ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
++      struct svc_rqst *rqstp;
+       struct task_struct *task;
+       unsigned int state = serv->sv_nrthreads-1;
+@@ -808,7 +809,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+               task = choose_victim(serv, pool, &state);
+               if (task == NULL)
+                       break;
+-              kthread_stop(task);
++              rqstp = kthread_data(task);
++              /* Did we lose a race to svo_function threadfn? */
++              if (kthread_stop(task) == -EINTR)
++                      svc_exit_thread(rqstp);
+               nrservs++;
+       } while (nrservs < 0);
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch b/queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch
new file mode 100644 (file)
index 0000000..5aafe25
--- /dev/null
@@ -0,0 +1,38 @@
+From a0dd1017e6c63ac7a75b9b4c039369f452bd8eb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Jan 2023 14:29:15 +0100
+Subject: udf: Fix off-by-one error when discarding preallocation
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit f54aa97fb7e5329a373f9df4e5e213ced4fc8759 ]
+
+The condition determining whether the preallocation can be used had
+an off-by-one error so we didn't discard preallocation when new
+allocation was just following it. This can then confuse code in
+inode_getblk().
+
+CC: stable@vger.kernel.org
+Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 81876284a83c0..d114774ecdea8 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -442,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
+        * Block beyond EOF and prealloc extents? Just discard preallocation
+        * as it is not useful and complicates things.
+        */
+-      if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents)
++      if (((loff_t)block) << inode->i_blkbits >= iinfo->i_lenExtents)
+               udf_discard_prealloc(inode);
+       udf_clear_extent_cache(inode);
+       phys = inode_getblk(inode, block, &err, &new);
+-- 
+2.39.2
+
diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch
new file mode 100644 (file)
index 0000000..37c43f7
--- /dev/null
@@ -0,0 +1,46 @@
+From 47186e711c93ccd622578fa4d86cc7499dfb1aff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 15:27:11 +0200
+Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-M
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 8e5248c3a8778f3e394e9a19195bc7a48f567ca2 ]
+
+This patch adds the necessary PCI IDs for Intel Meteor Lake-M
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230215132711.35668-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 57f4b068cf29b..054429e9b7152 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -42,6 +42,7 @@
+ #define PCI_DEVICE_ID_INTEL_JSP                       0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS              0x7ae1
+ #define PCI_DEVICE_ID_INTEL_RPLS              0x7a61
++#define PCI_DEVICE_ID_INTEL_MTLM              0x7eb1
+ #define PCI_DEVICE_ID_INTEL_MTLP              0x7ec1
+ #define PCI_DEVICE_ID_INTEL_MTL                       0x7e7e
+ #define PCI_DEVICE_ID_INTEL_TGL                       0x9a15
+@@ -394,6 +395,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLM),
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+-- 
+2.39.2
+
diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch
new file mode 100644 (file)
index 0000000..eaacc78
--- /dev/null
@@ -0,0 +1,51 @@
+From 00f613c8f462bab5d623365c67b5ca8defeca5e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Apr 2022 13:35:18 +0300
+Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-P
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 973e0f7a847ef13ade840d4c30729ce329a66895 ]
+
+This patch adds the necessary PCI IDs for Intel Meteor Lake-P
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Link: https://lore.kernel.org/r/20220425103518.44028-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index cc97cbb67e5f4..57f4b068cf29b 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -42,6 +42,8 @@
+ #define PCI_DEVICE_ID_INTEL_JSP                       0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS              0x7ae1
+ #define PCI_DEVICE_ID_INTEL_RPLS              0x7a61
++#define PCI_DEVICE_ID_INTEL_MTLP              0x7ec1
++#define PCI_DEVICE_ID_INTEL_MTL                       0x7e7e
+ #define PCI_DEVICE_ID_INTEL_TGL                       0x9a15
+ #define PCI_INTEL_BXT_DSM_GUID                "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+@@ -392,6 +394,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+-- 
+2.39.2
+
diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch
new file mode 100644 (file)
index 0000000..7e30dc3
--- /dev/null
@@ -0,0 +1,46 @@
+From 5dbce0765acfe614da7dd457dd1eff8e896594e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Feb 2022 17:19:48 +0300
+Subject: usb: dwc3: pci: add support for the Intel Raptor Lake-S
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 038438a25c45d5ac996e95a22fa9e76ff3d1f8c7 ]
+
+This patch adds the necessary PCI ID for Intel Raptor Lake-S
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20220214141948.18637-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 114d02ebf128a..cc97cbb67e5f4 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -41,6 +41,7 @@
+ #define PCI_DEVICE_ID_INTEL_TGPH              0x43ee
+ #define PCI_DEVICE_ID_INTEL_JSP                       0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS              0x7ae1
++#define PCI_DEVICE_ID_INTEL_RPLS              0x7a61
+ #define PCI_DEVICE_ID_INTEL_TGL                       0x9a15
+ #define PCI_INTEL_BXT_DSM_GUID                "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+@@ -388,6 +389,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+-- 
+2.39.2
+
diff --git a/queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch b/queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch
new file mode 100644 (file)
index 0000000..7642637
--- /dev/null
@@ -0,0 +1,57 @@
+From 1ca135a6080ac8408989feb5e07d261595eba264 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:13 +0300
+Subject: usb: dwc3: pci: ID for Tiger Lake CPU
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 73203bde3a95a48f27b2454dc6b955280c641afe ]
+
+Tiger Lake SOC (the versions of it that have integrated USB4
+controller) may have two DWC3 controllers. One is part of
+the PCH (Platform Controller Hub, i.e. the chipset) as
+usual, and the other is inside the actual CPU block.
+
+On all Intel platforms that have the two separate DWC3
+controllers, the one inside the CPU handles USB3 and only
+USB3 traffic, while the PCH version handles USB2 and USB2
+alone. The reason for splitting the two busses like this is
+to allow easy USB3 tunneling over USB4 connections. As USB2
+is not tunneled over USB4, it has dedicated USB controllers
+(both xHCI and DWC3).
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-4-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 70cdb59c04c81..114d02ebf128a 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -41,6 +41,7 @@
+ #define PCI_DEVICE_ID_INTEL_TGPH              0x43ee
+ #define PCI_DEVICE_ID_INTEL_JSP                       0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS              0x7ae1
++#define PCI_DEVICE_ID_INTEL_TGL                       0x9a15
+ #define PCI_INTEL_BXT_DSM_GUID                "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+ #define PCI_INTEL_BXT_FUNC_PMU_PWR    4
+@@ -387,6 +388,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+         (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++      { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB),
+         (kernel_ulong_t) &dwc3_pci_amd_swnode, },
+       {  }    /* Terminating Entry */
+-- 
+2.39.2
+
diff --git a/queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch b/queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch
new file mode 100644 (file)
index 0000000..b803218
--- /dev/null
@@ -0,0 +1,173 @@
+From 7772a4337dc5115df15e37110e5c52e0db82c0da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:12 +0300
+Subject: usb: dwc3: pci: Register a software node for the dwc3 platform device
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit e492ce9bcaa1c9661cd3dd6cff0eedf2fa640f31 ]
+
+By registering the software node directly instead of just
+the properties in it, the driver can take advantage of also
+the other features the software nodes have.
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-3-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 61 ++++++++++++++++++++++---------------
+ 1 file changed, 37 insertions(+), 24 deletions(-)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index a5a8c5712bce4..70cdb59c04c81 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -145,6 +145,18 @@ static const struct property_entry dwc3_pci_amd_properties[] = {
+       {}
+ };
++static const struct software_node dwc3_pci_intel_swnode = {
++      .properties = dwc3_pci_intel_properties,
++};
++
++static const struct software_node dwc3_pci_intel_mrfld_swnode = {
++      .properties = dwc3_pci_mrfld_properties,
++};
++
++static const struct software_node dwc3_pci_amd_swnode = {
++      .properties = dwc3_pci_amd_properties,
++};
++
+ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
+ {
+       struct pci_dev                  *pdev = dwc->pci;
+@@ -225,7 +237,6 @@ static void dwc3_pci_resume_work(struct work_struct *work)
+ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ {
+-      struct property_entry *p = (struct property_entry *)id->driver_data;
+       struct dwc3_pci         *dwc;
+       struct resource         res[2];
+       int                     ret;
+@@ -268,7 +279,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+       dwc->dwc3->dev.parent = dev;
+       ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev));
+-      ret = platform_device_add_properties(dwc->dwc3, p);
++      ret = device_add_software_node(&dwc->dwc3->dev, (void *)id->driver_data);
+       if (ret < 0)
+               goto err;
+@@ -291,6 +302,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+       return 0;
+ err:
++      device_remove_software_node(&dwc->dwc3->dev);
+       platform_device_put(dwc->dwc3);
+       return ret;
+ }
+@@ -307,75 +319,76 @@ static void dwc3_pci_remove(struct pci_dev *pci)
+ #endif
+       device_init_wakeup(&pci->dev, false);
+       pm_runtime_get(&pci->dev);
++      device_remove_software_node(&dwc->dwc3->dev);
+       platform_device_unregister(dwc->dwc3);
+ }
+ static const struct pci_device_id dwc3_pci_id_table[] = {
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BSW),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
+-        (kernel_ulong_t) &dwc3_pci_mrfld_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTH),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT_M),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_APL),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_KBP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_GLK),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPH),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPV),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICLLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_EHLLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPLP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPH),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_JSP),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+-        (kernel_ulong_t) &dwc3_pci_intel_properties, },
++        (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+       { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB),
+-        (kernel_ulong_t) &dwc3_pci_amd_properties, },
++        (kernel_ulong_t) &dwc3_pci_amd_swnode, },
+       {  }    /* Terminating Entry */
+ };
+ MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table);
+-- 
+2.39.2
+