--- /dev/null
+From e71e11098ba2822015d64028427de64e99a16ff0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 17:10:47 +0100
+Subject: arm64: efi: Make efi_rt_lock a raw_spinlock
+
+From: Pierre Gondois <pierre.gondois@arm.com>
+
+[ Upstream commit 0e68b5517d3767562889f1d83fdb828c26adb24f ]
+
+Running a rt-kernel base on 6.2.0-rc3-rt1 on an Ampere Altra outputs
+the following:
+ BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46
+ in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9, name: kworker/u320:0
+ preempt_count: 2, expected: 0
+ RCU nest depth: 0, expected: 0
+ 3 locks held by kworker/u320:0/9:
+ #0: ffff3fff8c27d128 ((wq_completion)efi_rts_wq){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41)
+ #1: ffff80000861bdd0 ((work_completion)(&efi_rts_work.work)){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41)
+ #2: ffffdf7e1ed3e460 (efi_rt_lock){+.+.}-{3:3}, at: efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101)
+ Preemption disabled at:
+ efi_virtmap_load (./arch/arm64/include/asm/mmu_context.h:248)
+ CPU: 0 PID: 9 Comm: kworker/u320:0 Tainted: G W 6.2.0-rc3-rt1
+ Hardware name: WIWYNN Mt.Jade Server System B81.03001.0005/Mt.Jade Motherboard, BIOS 1.08.20220218 (SCP: 1.08.20220218) 2022/02/18
+ Workqueue: efi_rts_wq efi_call_rts
+ Call trace:
+ dump_backtrace (arch/arm64/kernel/stacktrace.c:158)
+ show_stack (arch/arm64/kernel/stacktrace.c:165)
+ dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4))
+ dump_stack (lib/dump_stack.c:114)
+ __might_resched (kernel/sched/core.c:10134)
+ rt_spin_lock (kernel/locking/rtmutex.c:1769 (discriminator 4))
+ efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101)
+ [...]
+
+This seems to come from commit ff7a167961d1 ("arm64: efi: Execute
+runtime services from a dedicated stack") which adds a spinlock. This
+spinlock is taken through:
+efi_call_rts()
+\-efi_call_virt()
+ \-efi_call_virt_pointer()
+ \-arch_efi_call_virt_setup()
+
+Make 'efi_rt_lock' a raw_spinlock to avoid being preempted.
+
+[ardb: The EFI runtime services are called with a different set of
+ translation tables, and are permitted to use the SIMD registers.
+ The context switch code preserves/restores neither, and so EFI
+ calls must be made with preemption disabled, rather than only
+ disabling migration.]
+
+Fixes: ff7a167961d1 ("arm64: efi: Execute runtime services from a dedicated stack")
+Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
+Cc: <stable@vger.kernel.org> # v6.1+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/efi.h | 6 +++---
+ arch/arm64/kernel/efi.c | 2 +-
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h
+index 16892f0d05ad6..538b6a1b198b9 100644
+--- a/arch/arm64/include/asm/efi.h
++++ b/arch/arm64/include/asm/efi.h
+@@ -25,7 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+ ({ \
+ efi_virtmap_load(); \
+ __efi_fpsimd_begin(); \
+- spin_lock(&efi_rt_lock); \
++ raw_spin_lock(&efi_rt_lock); \
+ })
+
+ #define arch_efi_call_virt(p, f, args...) \
+@@ -37,12 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md);
+
+ #define arch_efi_call_virt_teardown() \
+ ({ \
+- spin_unlock(&efi_rt_lock); \
++ raw_spin_unlock(&efi_rt_lock); \
+ __efi_fpsimd_end(); \
+ efi_virtmap_unload(); \
+ })
+
+-extern spinlock_t efi_rt_lock;
++extern raw_spinlock_t efi_rt_lock;
+ efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...);
+
+ #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
+diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
+index 72f432d23ec5c..3ee3b3daca47b 100644
+--- a/arch/arm64/kernel/efi.c
++++ b/arch/arm64/kernel/efi.c
+@@ -144,7 +144,7 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f)
+ return s;
+ }
+
+-DEFINE_SPINLOCK(efi_rt_lock);
++DEFINE_RAW_SPINLOCK(efi_rt_lock);
+
+ asmlinkage u64 *efi_rt_stack_top __ro_after_init;
+
+--
+2.39.2
+
--- /dev/null
+From d4d1fc0c2871c7e8f9ee89b3e532ad1d1688edcb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 10:11:56 +0100
+Subject: bgmac: fix *initial* chip reset to support BCM5358
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Rafał Miłecki <rafal@milecki.pl>
+
+[ Upstream commit f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 ]
+
+While bringing hardware up we should perform a full reset including the
+switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what
+specification says and what reference driver does.
+
+This seems to be critical for the BCM5358. Without this hardware doesn't
+get initialized properly and doesn't seem to transmit or receive any
+packets.
+
+Originally bgmac was calling bgmac_chip_reset() before setting
+"has_robosw" property which resulted in expected behaviour. That has
+changed as a side effect of adding platform device support which
+regressed BCM5358 support.
+
+Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support")
+Cc: Jon Mason <jdmason@kudzu.us>
+Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++--
+ drivers/net/ethernet/broadcom/bgmac.h | 2 ++
+ 2 files changed, 8 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
+index 9960127f612ea..bb999e67d7736 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.c
++++ b/drivers/net/ethernet/broadcom/bgmac.c
+@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac)
+
+ if (iost & BGMAC_BCMA_IOST_ATTACHED) {
+ flags = BGMAC_BCMA_IOCTL_SW_CLKEN;
+- if (!bgmac->has_robosw)
++ if (bgmac->in_init || !bgmac->has_robosw)
+ flags |= BGMAC_BCMA_IOCTL_SW_RESET;
+ }
+ bgmac_clk_enable(bgmac, flags);
+ }
+
+- if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw)
++ if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw))
+ bgmac_idm_write(bgmac, BCMA_IOCTL,
+ bgmac_idm_read(bgmac, BCMA_IOCTL) &
+ ~BGMAC_BCMA_IOCTL_SW_RESET);
+@@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ struct net_device *net_dev = bgmac->net_dev;
+ int err;
+
++ bgmac->in_init = true;
++
+ bgmac_chip_intrs_off(bgmac);
+
+ net_dev->irq = bgmac->irq;
+@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac)
+ /* Omit FCS from max MTU size */
+ net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN;
+
++ bgmac->in_init = false;
++
+ err = register_netdev(bgmac->net_dev);
+ if (err) {
+ dev_err(bgmac->dev, "Cannot register net device\n");
+diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
+index 351c598a3ec6d..d1200b27af1ed 100644
+--- a/drivers/net/ethernet/broadcom/bgmac.h
++++ b/drivers/net/ethernet/broadcom/bgmac.h
+@@ -512,6 +512,8 @@ struct bgmac {
+ int irq;
+ u32 int_mask;
+
++ bool in_init;
++
+ /* Current MAC state */
+ int mac_speed;
+ int mac_duplex;
+--
+2.39.2
+
--- /dev/null
+From a11bbbffc1c785fdd6e539212e262c6a47fe0eb6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 18:43:57 -0800
+Subject: bnxt_en: Avoid order-5 memory allocation for TPA data
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ]
+
+The driver needs to keep track of all the possible concurrent TPA (GRO/LRO)
+completions on the aggregation ring. On P5 chips, the maximum number
+of concurrent TPA is 256 and the amount of memory we allocate is order-5
+on systems using 4K pages. Memory allocation failure has been reported:
+
+NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1
+CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1
+Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022
+Call Trace:
+ dump_stack+0x57/0x6e
+ warn_alloc.cold.120+0x7b/0xdd
+ ? _cond_resched+0x15/0x30
+ ? __alloc_pages_direct_compact+0x15f/0x170
+ __alloc_pages_slowpath.constprop.108+0xc58/0xc70
+ __alloc_pages_nodemask+0x2d0/0x300
+ kmalloc_order+0x24/0xe0
+ kmalloc_order_trace+0x19/0x80
+ bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en]
+ ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en]
+ __bnxt_open_nic+0x12e/0x780 [bnxt_en]
+ bnxt_open+0x10b/0x240 [bnxt_en]
+ __dev_open+0xe9/0x180
+ __dev_change_flags+0x1af/0x220
+ dev_change_flags+0x21/0x60
+ do_setlink+0x35c/0x1100
+
+Instead of allocating this big chunk of memory and dividing it up for the
+concurrent TPA instances, allocate each small chunk separately for each
+TPA instance. This will reduce it to order-0 allocations.
+
+Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.")
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Damodharam Ammepalli <damodharam.ammepalli@broadcom.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++-----------
+ 1 file changed, 12 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index c4a768ce8c99d..6928c0b578abb 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -2854,7 +2854,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem)
+
+ static void bnxt_free_tpa_info(struct bnxt *bp)
+ {
+- int i;
++ int i, j;
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+ struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i];
+@@ -2862,8 +2862,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+ kfree(rxr->rx_tpa_idx_map);
+ rxr->rx_tpa_idx_map = NULL;
+ if (rxr->rx_tpa) {
+- kfree(rxr->rx_tpa[0].agg_arr);
+- rxr->rx_tpa[0].agg_arr = NULL;
++ for (j = 0; j < bp->max_tpa; j++) {
++ kfree(rxr->rx_tpa[j].agg_arr);
++ rxr->rx_tpa[j].agg_arr = NULL;
++ }
+ }
+ kfree(rxr->rx_tpa);
+ rxr->rx_tpa = NULL;
+@@ -2872,14 +2874,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp)
+
+ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+ {
+- int i, j, total_aggs = 0;
++ int i, j;
+
+ bp->max_tpa = MAX_TPA;
+ if (bp->flags & BNXT_FLAG_CHIP_P5) {
+ if (!bp->max_tpa_v2)
+ return 0;
+ bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5);
+- total_aggs = bp->max_tpa * MAX_SKB_FRAGS;
+ }
+
+ for (i = 0; i < bp->rx_nr_rings; i++) {
+@@ -2893,12 +2894,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp)
+
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5))
+ continue;
+- agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL);
+- rxr->rx_tpa[0].agg_arr = agg;
+- if (!agg)
+- return -ENOMEM;
+- for (j = 1; j < bp->max_tpa; j++)
+- rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS;
++ for (j = 0; j < bp->max_tpa; j++) {
++ agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL);
++ if (!agg)
++ return -ENOMEM;
++ rxr->rx_tpa[j].agg_arr = agg;
++ }
+ rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map),
+ GFP_KERNEL);
+ if (!rxr->rx_tpa_idx_map)
+--
+2.39.2
+
--- /dev/null
+From 01a893a6cbfda04f112334facb1d06d1465492ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 11:21:37 +0000
+Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR
+
+From: Lorenz Bauer <lorenz.bauer@isovalent.com>
+
+[ Upstream commit 9b459804ff9973e173fabafba2a1319f771e85fa ]
+
+btf_datasec_resolve contains a bug that causes the following BTF
+to fail loading:
+
+ [1] DATASEC a size=2 vlen=2
+ type_id=4 offset=0 size=1
+ type_id=7 offset=1 size=1
+ [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
+ [3] PTR (anon) type_id=2
+ [4] VAR a type_id=3 linkage=0
+ [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none)
+ [6] TYPEDEF td type_id=5
+ [7] VAR b type_id=6 linkage=0
+
+This error message is printed during btf_check_all_types:
+
+ [1] DATASEC a size=2 vlen=2
+ type_id=7 offset=1 size=1 Invalid type
+
+By tracing btf_*_resolve we can pinpoint the problem:
+
+ btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0
+ btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0
+ btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0
+ btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0
+ btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22
+
+The last invocation of btf_datasec_resolve should invoke btf_var_resolve
+by means of env_stack_push, instead it returns EINVAL. The reason is that
+env_stack_push is never executed for the second VAR.
+
+ if (!env_type_is_resolve_sink(env, var_type) &&
+ !env_type_is_resolved(env, var_type_id)) {
+ env_stack_set_next_member(env, i + 1);
+ return env_stack_push(env, var_type, var_type_id);
+ }
+
+env_type_is_resolve_sink() changes its behaviour based on resolve_mode.
+For RESOLVE_PTR, we can simplify the if condition to the following:
+
+ (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved()
+
+Since we're dealing with a VAR the clause evaluates to false. This is
+not sufficient to trigger the bug however. The log output and EINVAL
+are only generated if btf_type_id_size() fails.
+
+ if (!btf_type_id_size(btf, &type_id, &type_size)) {
+ btf_verifier_log_vsi(env, v->t, vsi, "Invalid type");
+ return -EINVAL;
+ }
+
+Most types are sized, so for example a VAR referring to an INT is not a
+problem. The bug is only triggered if a VAR points at a modifier. Since
+we skipped btf_var_resolve that modifier was also never resolved, which
+means that btf_resolved_type_id returns 0 aka VOID for the modifier.
+This in turn causes btf_type_id_size to return NULL, triggering EINVAL.
+
+To summarise, the following conditions are necessary:
+
+- VAR pointing at PTR, STRUCT, UNION or ARRAY
+- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or
+ TYPE_TAG
+
+The fix is to reset resolve_mode to RESOLVE_TBD before attempting to
+resolve a VAR from a DATASEC.
+
+Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec")
+Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
+Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/btf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
+index 11b612e94e4e1..cb80d18a49b56 100644
+--- a/kernel/bpf/btf.c
++++ b/kernel/bpf/btf.c
+@@ -3541,6 +3541,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env,
+ struct btf *btf = env->btf;
+ u16 i;
+
++ env->resolve_mode = RESOLVE_TBD;
+ for_each_vsi_from(i, v->next_member, v->t, vsi) {
+ u32 var_type_id = vsi->type, type_id, type_size = 0;
+ const struct btf_type *var_type = btf_type_by_id(env->btf,
+--
+2.39.2
+
--- /dev/null
+From ba49194fa0fa953883ceba33e42bab3310521e19 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:56 +0300
+Subject: drm/msm/a5xx: fix context faults during ring switch
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit 32e7083429d46f29080626fe387ff90c086b1fbe ]
+
+The rptr_addr is set in the preempt_init_ring(), which is called from
+a5xx_gpu_init(). It uses shadowptr() to set the address, however the
+shadow_iova is not yet initialized at that time. Move the rptr_addr
+setting to the a5xx_preempt_hw_init() which is called after setting the
+shadow_iova, getting the correct value for the address.
+
+Fixes: 8907afb476ac ("drm/msm: Allow a5xx to mark the RPTR shadow as privileged")
+Suggested-by: Rob Clark <robdclark@gmail.com>
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522640/
+Link: https://lore.kernel.org/r/20230214020956.164473-5-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 9da0aff0072d7..b8e71ad6f8d8a 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -210,6 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
+ a5xx_gpu->preempt[i]->wptr = 0;
+ a5xx_gpu->preempt[i]->rptr = 0;
+ a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova;
++ a5xx_gpu->preempt[i]->rptr_addr = shadowptr(a5xx_gpu, gpu->rb[i]);
+ }
+
+ /* Write a 0 to signal that we aren't switching pagetables */
+@@ -261,7 +262,6 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu,
+ ptr->data = 0;
+ ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE;
+
+- ptr->rptr_addr = shadowptr(a5xx_gpu, ring);
+ ptr->counter = counters_iova;
+
+ return 0;
+--
+2.39.2
+
--- /dev/null
+From 6bd9ad57e2a4d722982c28bea33c15721d1111fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:53 +0300
+Subject: drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit a7a4c19c36de1e4b99b06e4060ccc8ab837725bc ]
+
+Rather than writing CP_PREEMPT_ENABLE_GLOBAL twice, follow the vendor
+kernel and set CP_PREEMPT_ENABLE_LOCAL register instead. a5xx_submit()
+will override it during submission, but let's get the sequence correct.
+
+Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522638/
+Link: https://lore.kernel.org/r/20230214020956.164473-2-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+index 0ca7e53db112a..64da65ae6d67e 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+@@ -144,8 +144,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
+ OUT_RING(ring, 1);
+
+ /* Enable local preemption for finegrain preemption */
+- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
+- OUT_RING(ring, 0x02);
++ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
++ OUT_RING(ring, 0x1);
+
+ /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
+ OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
+--
+2.39.2
+
--- /dev/null
+From 0eeb7ba42b85bfcad5a307f6b26dce6f8b22f535 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Feb 2023 05:09:55 +0300
+Subject: drm/msm/a5xx: fix the emptyness check in the preempt code
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit b4fb748f0b734ce1d2e7834998cc599fcbd25d67 ]
+
+Quoting Yassine: ring->memptrs->rptr is never updated and stays 0, so
+the comparison always evaluates to false and get_next_ring always
+returns ring 0 thinking it isn't empty.
+
+Fix this by calling get_rptr() instead of reading rptr directly.
+
+Reported-by: Yassine Oudjana <y.oudjana@protonmail.com>
+Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Patchwork: https://patchwork.freedesktop.org/patch/522642/
+Link: https://lore.kernel.org/r/20230214020956.164473-4-dmitry.baryshkov@linaro.org
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 183de1139eeb6..9da0aff0072d7 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -63,7 +63,7 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+ spin_lock_irqsave(&ring->preempt_lock, flags);
+- empty = (get_wptr(ring) == ring->memptrs->rptr);
++ empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring));
+ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ if (!empty)
+--
+2.39.2
+
--- /dev/null
+From 3c8d9d7d6bd4c73398e181bb2c3084cf13d15cc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Oct 2020 09:51:15 -0700
+Subject: drm/msm: Document and rename preempt_lock
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 77c406038e830a4b6219b14a116cd2a6ac9f4908 ]
+
+Before adding another lock, give ring->lock a more descriptive name.
+
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Jordan Crouse <jcrouse@codeaurora.org>
+Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Stable-dep-of: b4fb748f0b73 ("drm/msm/a5xx: fix the emptyness check in the preempt code")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++--
+ drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 ++++++------
+ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++--
+ drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +-
+ drivers/gpu/drm/msm/msm_ringbuffer.h | 7 ++++++-
+ 5 files changed, 17 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+index 64da65ae6d67e..6f84db97e20e8 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+@@ -36,7 +36,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
+ }
+
+- spin_lock_irqsave(&ring->lock, flags);
++ spin_lock_irqsave(&ring->preempt_lock, flags);
+
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+@@ -44,7 +44,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+ /* Make sure to wrap wptr if we need to */
+ wptr = get_wptr(ring);
+
+- spin_unlock_irqrestore(&ring->lock, flags);
++ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ /* Make sure everything is posted before making a decision */
+ mb();
+diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+index 7e04509c4e1f0..183de1139eeb6 100644
+--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+@@ -45,9 +45,9 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+ if (!ring)
+ return;
+
+- spin_lock_irqsave(&ring->lock, flags);
++ spin_lock_irqsave(&ring->preempt_lock, flags);
+ wptr = get_wptr(ring);
+- spin_unlock_irqrestore(&ring->lock, flags);
++ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
+ }
+@@ -62,9 +62,9 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu)
+ bool empty;
+ struct msm_ringbuffer *ring = gpu->rb[i];
+
+- spin_lock_irqsave(&ring->lock, flags);
++ spin_lock_irqsave(&ring->preempt_lock, flags);
+ empty = (get_wptr(ring) == ring->memptrs->rptr);
+- spin_unlock_irqrestore(&ring->lock, flags);
++ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ if (!empty)
+ return ring;
+@@ -132,9 +132,9 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
+ }
+
+ /* Make sure the wptr doesn't update while we're in motion */
+- spin_lock_irqsave(&ring->lock, flags);
++ spin_lock_irqsave(&ring->preempt_lock, flags);
+ a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
+- spin_unlock_irqrestore(&ring->lock, flags);
++ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ /* Set the address of the incoming preemption record */
+ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+index dffc133b8b1cc..29b40acedb389 100644
+--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+@@ -65,7 +65,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+ OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
+ }
+
+- spin_lock_irqsave(&ring->lock, flags);
++ spin_lock_irqsave(&ring->preempt_lock, flags);
+
+ /* Copy the shadow to the actual register */
+ ring->cur = ring->next;
+@@ -73,7 +73,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+ /* Make sure to wrap wptr if we need to */
+ wptr = get_wptr(ring);
+
+- spin_unlock_irqrestore(&ring->lock, flags);
++ spin_unlock_irqrestore(&ring->preempt_lock, flags);
+
+ /* Make sure everything is posted before making a decision */
+ mb();
+diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
+index 935bf9b1d9418..1b6958e908dca 100644
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
+@@ -46,7 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
+ ring->memptrs_iova = memptrs_iova;
+
+ INIT_LIST_HEAD(&ring->submits);
+- spin_lock_init(&ring->lock);
++ spin_lock_init(&ring->preempt_lock);
+
+ snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
+
+diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
+index 0987d6bf848cf..4956d1bc5d0e1 100644
+--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
++++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
+@@ -46,7 +46,12 @@ struct msm_ringbuffer {
+ struct msm_rbmemptrs *memptrs;
+ uint64_t memptrs_iova;
+ struct msm_fence_context *fctx;
+- spinlock_t lock;
++
++ /*
++ * preempt_lock protects preemption and serializes wptr updates against
++ * preemption. Can be aquired from irq context.
++ */
++ spinlock_t preempt_lock;
+ };
+
+ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
+--
+2.39.2
+
--- /dev/null
+From 75adf877270713e38c7156498a872511c6bffb9e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 15:50:48 -0800
+Subject: drm/msm: Fix potential invalid ptr free
+
+From: Rob Clark <robdclark@chromium.org>
+
+[ Upstream commit 8a86f213f4426f19511a16d886871805b35c3acf ]
+
+The error path cleanup expects that chain and syncobj are either NULL or
+valid pointers. But post_deps was not allocated with __GFP_ZERO.
+
+Fixes: ab723b7a992a ("drm/msm: Add syncobj support.")
+Signed-off-by: Rob Clark <robdclark@chromium.org>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+Patchwork: https://patchwork.freedesktop.org/patch/523051/
+Link: https://lore.kernel.org/r/20230215235048.1166484-1-robdclark@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/msm/msm_gem_submit.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
+index aa5c60a7132d8..c4e5037512b9d 100644
+--- a/drivers/gpu/drm/msm/msm_gem_submit.c
++++ b/drivers/gpu/drm/msm/msm_gem_submit.c
+@@ -494,8 +494,8 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+ int ret = 0;
+ uint32_t i, j;
+
+- post_deps = kmalloc_array(nr_syncobjs, sizeof(*post_deps),
+- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
++ post_deps = kcalloc(nr_syncobjs, sizeof(*post_deps),
++ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+ if (!post_deps)
+ return ERR_PTR(-ENOMEM);
+
+@@ -510,7 +510,6 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev,
+ }
+
+ post_deps[i].point = syncobj_desc.point;
+- post_deps[i].chain = NULL;
+
+ if (syncobj_desc.flags) {
+ ret = -EINVAL;
+--
+2.39.2
+
--- /dev/null
+From ebecc8c1d6e71abb356922a23a6f45847343f593 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Oct 2022 12:42:29 +0100
+Subject: drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype
+
+From: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+
+[ Upstream commit 3638a820c5c3b52f327cebb174fd4274bee08aa7 ]
+
+gcc-13 warns about mismatching types for enums. That revealed switched
+arguments of nv50_wndw_new_():
+ drivers/gpu/drm/nouveau/dispnv50/wndw.c:696:1: error: conflicting types for 'nv50_wndw_new_' due to enum/integer mismatch; have 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, u32, enum nv50_disp_interlock_type, u32, struct nv50_wndw **)'
+ drivers/gpu/drm/nouveau/dispnv50/wndw.h:36:5: note: previous declaration of 'nv50_wndw_new_' with type 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, enum nv50_disp_interlock_type, u32, u32, struct nv50_wndw **)'
+
+It can be barely visible, but the declaration says about the parameters
+in the middle:
+ enum nv50_disp_interlock_type,
+ u32 interlock_data,
+ u32 heads,
+
+While the definition states differently:
+ u32 heads,
+ enum nv50_disp_interlock_type interlock_type,
+ u32 interlock_data,
+
+Unify/fix the declaration to match the definition.
+
+Fixes: 53e0a3e70de6 ("drm/nouveau/kms/nv50-: simplify tracking of channel interlocks")
+Cc: Martin Liska <mliska@suse.cz>
+Cc: Ben Skeggs <bskeggs@redhat.com>
+Cc: Karol Herbst <kherbst@redhat.com>
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: David Airlie <airlied@gmail.com>
+Cc: Daniel Vetter <daniel@ffwll.ch>
+Cc: dri-devel@lists.freedesktop.org
+Cc: nouveau@lists.freedesktop.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
+Signed-off-by: Karol Herbst <kherbst@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221031114229.10289-1-jirislaby@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/dispnv50/wndw.h | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+index 8bed195ae098a..77bf124319fbd 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+@@ -38,8 +38,9 @@ struct nv50_wndw {
+
+ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+ enum drm_plane_type, const char *name, int index,
+- const u32 *format, enum nv50_disp_interlock_type,
+- u32 interlock_data, u32 heads, struct nv50_wndw **);
++ const u32 *format, u32 heads,
++ enum nv50_disp_interlock_type, u32 interlock_data,
++ struct nv50_wndw **);
+ void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+ struct nv50_wndw_atom *);
+ void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+--
+2.39.2
+
--- /dev/null
+From 08dfe9bad732d3e0ae2dc55714376d34aa89b0bd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Jun 2022 20:46:06 +1000
+Subject: drm/nouveau/kms/nv50-: remove unused functions
+
+From: Ben Skeggs <bskeggs@redhat.com>
+
+[ Upstream commit 89ed996b888faaf11c69bb4cbc19f21475c9050e ]
+
+Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
+Reviewed-by: Dave Airlie <airlied@redhat.com>
+Signed-off-by: Dave Airlie <airlied@redhat.com>
+Stable-dep-of: 3638a820c5c3 ("drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/dispnv50/disp.c | 16 ----------------
+ drivers/gpu/drm/nouveau/dispnv50/wndw.c | 12 ------------
+ drivers/gpu/drm/nouveau/dispnv50/wndw.h | 2 --
+ 3 files changed, 30 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+index c2d34c91e840c..804ea035fa46b 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
+@@ -2555,14 +2555,6 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend)
+ {
+ struct nouveau_drm *drm = nouveau_drm(dev);
+ struct drm_encoder *encoder;
+- struct drm_plane *plane;
+-
+- drm_for_each_plane(plane, dev) {
+- struct nv50_wndw *wndw = nv50_wndw(plane);
+- if (plane->funcs != &nv50_wndw)
+- continue;
+- nv50_wndw_fini(wndw);
+- }
+
+ list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+ if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST)
+@@ -2578,7 +2570,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+ {
+ struct nv50_core *core = nv50_disp(dev)->core;
+ struct drm_encoder *encoder;
+- struct drm_plane *plane;
+
+ if (resume || runtime)
+ core->func->init(core);
+@@ -2591,13 +2582,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime)
+ }
+ }
+
+- drm_for_each_plane(plane, dev) {
+- struct nv50_wndw *wndw = nv50_wndw(plane);
+- if (plane->funcs != &nv50_wndw)
+- continue;
+- nv50_wndw_init(wndw);
+- }
+-
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+index f07916ffe42cb..831125b4453df 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+@@ -690,18 +690,6 @@ nv50_wndw_notify(struct nvif_notify *notify)
+ return NVIF_NOTIFY_KEEP;
+ }
+
+-void
+-nv50_wndw_fini(struct nv50_wndw *wndw)
+-{
+- nvif_notify_put(&wndw->notify);
+-}
+-
+-void
+-nv50_wndw_init(struct nv50_wndw *wndw)
+-{
+- nvif_notify_get(&wndw->notify);
+-}
+-
+ static const u64 nv50_cursor_format_modifiers[] = {
+ DRM_FORMAT_MOD_LINEAR,
+ DRM_FORMAT_MOD_INVALID,
+diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+index 3278e28800343..8bed195ae098a 100644
+--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+@@ -40,8 +40,6 @@ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *,
+ enum drm_plane_type, const char *name, int index,
+ const u32 *format, enum nv50_disp_interlock_type,
+ u32 interlock_data, u32 heads, struct nv50_wndw **);
+-void nv50_wndw_init(struct nv50_wndw *);
+-void nv50_wndw_fini(struct nv50_wndw *);
+ void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock,
+ struct nv50_wndw_atom *);
+ void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush,
+--
+2.39.2
+
--- /dev/null
+From 6e92a7ec121c7e23afadefc601975ace938df6db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 9 Dec 2022 00:12:16 +0200
+Subject: efi/earlycon: Replace open coded strnchrnul()
+
+From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+[ Upstream commit b7a1cd243839cc1459fbc83a7a62e3b57f29f497 ]
+
+strnchrnul() can be called in the early stages. Replace
+open coded variant in the EFI early console driver.
+
+Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Stable-dep-of: 0e68b5517d37 ("arm64: efi: Make efi_rt_lock a raw_spinlock")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/earlycon.c | 13 ++++---------
+ 1 file changed, 4 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c
+index a52236e11e5f7..fc233b6f27cb2 100644
+--- a/drivers/firmware/efi/earlycon.c
++++ b/drivers/firmware/efi/earlycon.c
+@@ -10,6 +10,7 @@
+ #include <linux/kernel.h>
+ #include <linux/serial_core.h>
+ #include <linux/screen_info.h>
++#include <linux/string.h>
+
+ #include <asm/early_ioremap.h>
+
+@@ -143,16 +144,10 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num)
+ len = si->lfb_linelength;
+
+ while (num) {
+- unsigned int linemax;
+- unsigned int h, count = 0;
++ unsigned int linemax = (si->lfb_width - efi_x) / font->width;
++ unsigned int h, count;
+
+- for (s = str; *s && *s != '\n'; s++) {
+- if (count == num)
+- break;
+- count++;
+- }
+-
+- linemax = (si->lfb_width - efi_x) / font->width;
++ count = strnchrnul(str, num, '\n') - str;
+ if (count > linemax)
+ count = linemax;
+
+--
+2.39.2
+
--- /dev/null
+From f3fd8dc76223f6f1f523e9da85d149c534d08103 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 Jan 2023 12:22:21 +0100
+Subject: ext4: Fix possible corruption when moving a directory
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 0813299c586b175d7edb25f56412c54b812d0379 ]
+
+When we are renaming a directory to a different directory, we need to
+update '..' entry in the moved directory. However nothing prevents moved
+directory from being modified and even converted from the inline format
+to the normal format. When such race happens the rename code gets
+confused and we crash. Fix the problem by locking the moved directory.
+
+CC: stable@vger.kernel.org
+Fixes: 32f7f22c0b52 ("ext4: let ext4_rename handle inline dir")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20230126112221.11866-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index 6f335d58183ee..17590bb769147 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -3923,9 +3923,16 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+ if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
+ goto end_rename;
+ }
++ /*
++ * We need to protect against old.inode directory getting
++ * converted from inline directory format into a normal one.
++ */
++ inode_lock_nested(old.inode, I_MUTEX_NONDIR2);
+ retval = ext4_rename_dir_prepare(handle, &old);
+- if (retval)
++ if (retval) {
++ inode_unlock(old.inode);
+ goto end_rename;
++ }
+ }
+ /*
+ * If we're renaming a file within an inline_data dir and adding or
+@@ -4050,6 +4057,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
+ } else {
+ ext4_journal_stop(handle);
+ }
++ if (old.dir_bh)
++ inode_unlock(old.inode);
+ release_bh:
+ brelse(old.dir_bh);
+ brelse(old.bh);
+--
+2.39.2
+
--- /dev/null
+From 1339c021f60dc9b4a3e13f93010a5ec6da10eddb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 15:30:24 +0000
+Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping()
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e ]
+
+ila_xlat_nl_cmd_get_mapping() generates an empty skb,
+triggerring a recent sanity check [1].
+
+Instead, return an error code, so that user space
+can get it.
+
+[1]
+skb_assert_len
+WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline]
+WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+Modules linked in:
+CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023
+pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+pc : skb_assert_len include/linux/skbuff.h:2527 [inline]
+pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+lr : skb_assert_len include/linux/skbuff.h:2527 [inline]
+lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+sp : ffff80001e0d6c40
+x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0
+x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00
+x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10
+x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0
+x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000
+x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001
+x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600
+x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001
+x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744
+x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e
+Call trace:
+skb_assert_len include/linux/skbuff.h:2527 [inline]
+__dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156
+dev_queue_xmit include/linux/netdevice.h:3033 [inline]
+__netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline]
+__netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325
+netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338
+__netlink_sendskb net/netlink/af_netlink.c:1283 [inline]
+netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292
+netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380
+nlmsg_unicast include/net/netlink.h:1099 [inline]
+genlmsg_unicast include/net/genetlink.h:433 [inline]
+genlmsg_reply include/net/genetlink.h:443 [inline]
+ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493
+genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline]
+genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline]
+genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065
+netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574
+genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076
+netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline]
+netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365
+netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942
+sock_sendmsg_nosec net/socket.c:714 [inline]
+sock_sendmsg net/socket.c:734 [inline]
+____sys_sendmsg+0x558/0x844 net/socket.c:2479
+___sys_sendmsg net/socket.c:2533 [inline]
+__sys_sendmsg+0x26c/0x33c net/socket.c:2562
+__do_sys_sendmsg net/socket.c:2571 [inline]
+__se_sys_sendmsg net/socket.c:2569 [inline]
+__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569
+__invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
+invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52
+el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142
+do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193
+el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637
+el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655
+el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591
+irq event stamp: 136484
+hardirqs last enabled at (136483): [<ffff800008350244>] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345
+hardirqs last disabled at (136484): [<ffff800012358d60>] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405
+softirqs last enabled at (136418): [<ffff800008020ea8>] softirq_handle_end kernel/softirq.c:414 [inline]
+softirqs last enabled at (136418): [<ffff800008020ea8>] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600
+softirqs last disabled at (136371): [<ffff80000802b4a4>] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80
+---[ end trace 0000000000000000 ]---
+skb len=0 headroom=0 headlen=0 tailroom=192
+mac=(0,0) net=(0,-1) trans=-1
+shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
+csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0)
+hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0
+dev name=nlmon0 feat=0x0000000000005861
+
+Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility")
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ila/ila_xlat.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
+index a1ac0e3d8c60c..163668531a57f 100644
+--- a/net/ipv6/ila/ila_xlat.c
++++ b/net/ipv6/ila/ila_xlat.c
+@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info)
+
+ rcu_read_lock();
+
++ ret = -ESRCH;
+ ila = ila_lookup_by_params(&xp, ilan);
+ if (ila) {
+ ret = ila_dump_info(ila,
+--
+2.39.2
+
--- /dev/null
+From 6f729f8ef0d0d13b35f56334c130830099a64bbe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 20 Mar 2021 10:09:16 +0800
+Subject: iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry()
+
+From: Lu Baolu <baolu.lu@linux.intel.com>
+
+[ Upstream commit 803766cbf85fb8edbf896729bbefc2d38dcf1e0a ]
+
+The pasid_lock is used to synchronize different threads from modifying a
+same pasid directory entry at the same time. It causes below lockdep splat.
+
+[ 83.296538] ========================================================
+[ 83.296538] WARNING: possible irq lock inversion dependency detected
+[ 83.296539] 5.12.0-rc3+ #25 Tainted: G W
+[ 83.296539] --------------------------------------------------------
+[ 83.296540] bash/780 just changed the state of lock:
+[ 83.296540] ffffffff82b29c98 (device_domain_lock){..-.}-{2:2}, at:
+ iommu_flush_dev_iotlb.part.0+0x32/0x110
+[ 83.296547] but this lock took another, SOFTIRQ-unsafe lock in the past:
+[ 83.296547] (pasid_lock){+.+.}-{2:2}
+[ 83.296548]
+
+ and interrupts could create inverse lock ordering between them.
+
+[ 83.296549] other info that might help us debug this:
+[ 83.296549] Chain exists of:
+ device_domain_lock --> &iommu->lock --> pasid_lock
+[ 83.296551] Possible interrupt unsafe locking scenario:
+
+[ 83.296551] CPU0 CPU1
+[ 83.296552] ---- ----
+[ 83.296552] lock(pasid_lock);
+[ 83.296553] local_irq_disable();
+[ 83.296553] lock(device_domain_lock);
+[ 83.296554] lock(&iommu->lock);
+[ 83.296554] <Interrupt>
+[ 83.296554] lock(device_domain_lock);
+[ 83.296555]
+ *** DEADLOCK ***
+
+Fix it by replacing the pasid_lock with an atomic exchange operation.
+
+Reported-and-tested-by: Dave Jiang <dave.jiang@intel.com>
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Link: https://lore.kernel.org/r/20210320020916.640115-1-baolu.lu@linux.intel.com
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Stable-dep-of: 194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/pasid.c | 21 +++++++++++++--------
+ 1 file changed, 13 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
+index 86fd49ae7f612..f821153390e53 100644
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -24,7 +24,6 @@
+ /*
+ * Intel IOMMU system wide PASID name space:
+ */
+-static DEFINE_SPINLOCK(pasid_lock);
+ u32 intel_pasid_max_id = PASID_MAX;
+
+ int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid)
+@@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+ dir_index = pasid >> PASID_PDE_SHIFT;
+ index = pasid & PASID_PTE_MASK;
+
+- spin_lock(&pasid_lock);
++retry:
+ entries = get_pasid_table_from_pde(&dir[dir_index]);
+ if (!entries) {
+ entries = alloc_pgtable_page(info->iommu->node);
+- if (!entries) {
+- spin_unlock(&pasid_lock);
++ if (!entries)
+ return NULL;
+- }
+
+- WRITE_ONCE(dir[dir_index].val,
+- (u64)virt_to_phys(entries) | PASID_PTE_PRESENT);
++ /*
++ * The pasid directory table entry won't be freed after
++ * allocation. No worry about the race with free and
++ * clear. However, this entry might be populated by others
++ * while we are preparing it. Use theirs with a retry.
++ */
++ if (cmpxchg64(&dir[dir_index].val, 0ULL,
++ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) {
++ free_pgtable_page(entries);
++ goto retry;
++ }
+ }
+- spin_unlock(&pasid_lock);
+
+ return &entries[index];
+ }
+--
+2.39.2
+
--- /dev/null
+From c5ea5be059771ebbdbacad4dbe4a9958b7cd028b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Feb 2023 21:08:15 +0800
+Subject: iommu/vt-d: Fix PASID directory pointer coherency
+
+From: Jacob Pan <jacob.jun.pan@linux.intel.com>
+
+[ Upstream commit 194b3348bdbb7db65375c72f3f774aee4cc6614e ]
+
+On platforms that do not support IOMMU Extended capability bit 0
+Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing
+any translation structures. IOMMU access goes only directly to
+memory. Intel IOMMU code was missing a flush for the PASID table
+directory that resulted in the unrecoverable fault as shown below.
+
+This patch adds clflush calls whenever allocating and updating
+a PASID table directory to ensure cache coherency.
+
+On the reverse direction, there's no need to clflush the PASID directory
+pointer when we deactivate a context entry in that IOMMU hardware will
+not see the old PASID directory pointer after we clear the context entry.
+PASID directory entries are also never freed once allocated.
+
+ DMAR: DRHD: handling fault status reg 3
+ DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000
+ [fault reason 0x51] SM: Present bit in Directory Entry is clear
+ DMAR: Dump dmar1 table entries for IOVA 0x1026a4000
+ DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001
+ DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401
+ DMAR: pasid dir entry: 0x0000000101b4e001
+ DMAR: pasid table entry[0]: 0x0000000000000109
+ DMAR: pasid table entry[1]: 0x0000000000000001
+ DMAR: pasid table entry[2]: 0x0000000000000000
+ DMAR: pasid table entry[3]: 0x0000000000000000
+ DMAR: pasid table entry[4]: 0x0000000000000000
+ DMAR: pasid table entry[5]: 0x0000000000000000
+ DMAR: pasid table entry[6]: 0x0000000000000000
+ DMAR: pasid table entry[7]: 0x0000000000000000
+ DMAR: PTE not present at level 4
+
+Cc: <stable@vger.kernel.org>
+Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables")
+Reviewed-by: Kevin Tian <kevin.tian@intel.com>
+Reported-by: Sukumar Ghorai <sukumar.ghorai@intel.com>
+Signed-off-by: Ashok Raj <ashok.raj@intel.com>
+Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
+Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com
+Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Joerg Roedel <jroedel@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/pasid.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
+index f821153390e53..80d6412e2c546 100644
+--- a/drivers/iommu/intel/pasid.c
++++ b/drivers/iommu/intel/pasid.c
+@@ -186,6 +186,9 @@ int intel_pasid_alloc_table(struct device *dev)
+ attach_out:
+ device_attach_pasid_table(info, pasid_table);
+
++ if (!ecap_coherent(info->iommu->ecap))
++ clflush_cache_range(pasid_table->table, size);
++
+ return 0;
+ }
+
+@@ -276,6 +279,10 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid)
+ free_pgtable_page(entries);
+ goto retry;
+ }
++ if (!ecap_coherent(info->iommu->ecap)) {
++ clflush_cache_range(entries, VTD_PAGE_SIZE);
++ clflush_cache_range(&dir[dir_index].val, sizeof(*dir));
++ }
+ }
+
+ return &entries[index];
+--
+2.39.2
+
--- /dev/null
+From aeb29bf469c12a96239daf3b5055dbb7cc916184 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 Mar 2021 04:21:30 +0100
+Subject: irq: Fix typos in comments
+
+From: Ingo Molnar <mingo@kernel.org>
+
+[ Upstream commit a359f757965aafd0f58570de95dc6bc06cf12a9c ]
+
+Fix ~36 single-word typos in the IRQ, irqchip and irqdomain code comments.
+
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Stable-dep-of: 6e6f75c9c98d ("irqdomain: Look for existing mapping only once")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/irqchip/irq-aspeed-vic.c | 4 ++--
+ drivers/irqchip/irq-bcm7120-l2.c | 2 +-
+ drivers/irqchip/irq-csky-apb-intc.c | 2 +-
+ drivers/irqchip/irq-gic-v2m.c | 2 +-
+ drivers/irqchip/irq-gic-v3-its.c | 10 +++++-----
+ drivers/irqchip/irq-gic-v3.c | 2 +-
+ drivers/irqchip/irq-loongson-pch-pic.c | 2 +-
+ drivers/irqchip/irq-meson-gpio.c | 2 +-
+ drivers/irqchip/irq-mtk-cirq.c | 2 +-
+ drivers/irqchip/irq-mxs.c | 4 ++--
+ drivers/irqchip/irq-sun4i.c | 2 +-
+ drivers/irqchip/irq-ti-sci-inta.c | 2 +-
+ drivers/irqchip/irq-vic.c | 4 ++--
+ drivers/irqchip/irq-xilinx-intc.c | 2 +-
+ include/linux/irq.h | 4 ++--
+ include/linux/irqdesc.h | 2 +-
+ kernel/irq/chip.c | 2 +-
+ kernel/irq/dummychip.c | 2 +-
+ kernel/irq/irqdesc.c | 2 +-
+ kernel/irq/irqdomain.c | 8 ++++----
+ kernel/irq/manage.c | 6 +++---
+ kernel/irq/msi.c | 2 +-
+ kernel/irq/timings.c | 2 +-
+ 23 files changed, 36 insertions(+), 36 deletions(-)
+
+diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c
+index 6567ed782f82c..58717cd44f99f 100644
+--- a/drivers/irqchip/irq-aspeed-vic.c
++++ b/drivers/irqchip/irq-aspeed-vic.c
+@@ -71,7 +71,7 @@ static void vic_init_hw(struct aspeed_vic *vic)
+ writel(0, vic->base + AVIC_INT_SELECT);
+ writel(0, vic->base + AVIC_INT_SELECT + 4);
+
+- /* Some interrupts have a programable high/low level trigger
++ /* Some interrupts have a programmable high/low level trigger
+ * (4 GPIO direct inputs), for now we assume this was configured
+ * by firmware. We read which ones are edge now.
+ */
+@@ -203,7 +203,7 @@ static int __init avic_of_init(struct device_node *node,
+ }
+ vic->base = regs;
+
+- /* Initialize soures, all masked */
++ /* Initialize sources, all masked */
+ vic_init_hw(vic);
+
+ /* Ready to receive interrupts */
+diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c
+index 7d776c905b7d2..1c2c5bd5a9fc1 100644
+--- a/drivers/irqchip/irq-bcm7120-l2.c
++++ b/drivers/irqchip/irq-bcm7120-l2.c
+@@ -310,7 +310,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn,
+
+ if (data->can_wake) {
+ /* This IRQ chip can wake the system, set all
+- * relevant child interupts in wake_enabled mask
++ * relevant child interrupts in wake_enabled mask
+ */
+ gc->wake_enabled = 0xffffffff;
+ gc->wake_enabled &= ~gc->unused;
+diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c
+index 5a2ec43b7ddd4..ab91afa867557 100644
+--- a/drivers/irqchip/irq-csky-apb-intc.c
++++ b/drivers/irqchip/irq-csky-apb-intc.c
+@@ -176,7 +176,7 @@ gx_intc_init(struct device_node *node, struct device_node *parent)
+ writel(0x0, reg_base + GX_INTC_NEN63_32);
+
+ /*
+- * Initial mask reg with all unmasked, because we only use enalbe reg
++ * Initial mask reg with all unmasked, because we only use enable reg
+ */
+ writel(0x0, reg_base + GX_INTC_NMASK31_00);
+ writel(0x0, reg_base + GX_INTC_NMASK63_32);
+diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
+index fbec07d634ad2..4116b48e60aff 100644
+--- a/drivers/irqchip/irq-gic-v2m.c
++++ b/drivers/irqchip/irq-gic-v2m.c
+@@ -371,7 +371,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode,
+ * the MSI data is the absolute value within the range from
+ * spi_start to (spi_start + num_spis).
+ *
+- * Broadom NS2 GICv2m implementation has an erratum where the MSI data
++ * Broadcom NS2 GICv2m implementation has an erratum where the MSI data
+ * is 'spi_number - 32'
+ *
+ * Reading that register fails on the Graviton implementation
+diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
+index d8cb5bcd6b10e..5ec091c64d47f 100644
+--- a/drivers/irqchip/irq-gic-v3-its.c
++++ b/drivers/irqchip/irq-gic-v3-its.c
+@@ -1492,7 +1492,7 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable)
+ *
+ * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI
+ * value or to 1023, depending on the enable bit. But that
+- * would be issueing a mapping for an /existing/ DevID+EventID
++ * would be issuing a mapping for an /existing/ DevID+EventID
+ * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI
+ * to the /same/ vPE, using this opportunity to adjust the
+ * doorbell. Mouahahahaha. We loves it, Precious.
+@@ -3122,7 +3122,7 @@ static void its_cpu_init_lpis(void)
+
+ /*
+ * It's possible for CPU to receive VLPIs before it is
+- * sheduled as a vPE, especially for the first CPU, and the
++ * scheduled as a vPE, especially for the first CPU, and the
+ * VLPI with INTID larger than 2^(IDbits+1) will be considered
+ * as out of range and dropped by GIC.
+ * So we initialize IDbits to known value to avoid VLPI drop.
+@@ -3613,7 +3613,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+
+ /*
+ * If all interrupts have been freed, start mopping the
+- * floor. This is conditionned on the device not being shared.
++ * floor. This is conditioned on the device not being shared.
+ */
+ if (!its_dev->shared &&
+ bitmap_empty(its_dev->event_map.lpi_map,
+@@ -4187,7 +4187,7 @@ static int its_sgi_set_affinity(struct irq_data *d,
+ {
+ /*
+ * There is no notion of affinity for virtual SGIs, at least
+- * not on the host (since they can only be targetting a vPE).
++ * not on the host (since they can only be targeting a vPE).
+ * Tell the kernel we've done whatever it asked for.
+ */
+ irq_data_update_effective_affinity(d, mask_val);
+@@ -4232,7 +4232,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d,
+ /*
+ * Locking galore! We can race against two different events:
+ *
+- * - Concurent vPE affinity change: we must make sure it cannot
++ * - Concurrent vPE affinity change: we must make sure it cannot
+ * happen, or we'll talk to the wrong redistributor. This is
+ * identical to what happens with vLPIs.
+ *
+diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
+index 4c8f18f0cecf8..2805969e4f15a 100644
+--- a/drivers/irqchip/irq-gic-v3.c
++++ b/drivers/irqchip/irq-gic-v3.c
+@@ -1456,7 +1456,7 @@ static int gic_irq_domain_translate(struct irq_domain *d,
+
+ /*
+ * Make it clear that broken DTs are... broken.
+- * Partitionned PPIs are an unfortunate exception.
++ * Partitioned PPIs are an unfortunate exception.
+ */
+ WARN_ON(*type == IRQ_TYPE_NONE &&
+ fwspec->param[0] != GIC_IRQ_TYPE_PARTITION);
+diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c
+index 90e1ad6e36120..a4eb8a2181c7f 100644
+--- a/drivers/irqchip/irq-loongson-pch-pic.c
++++ b/drivers/irqchip/irq-loongson-pch-pic.c
+@@ -180,7 +180,7 @@ static void pch_pic_reset(struct pch_pic *priv)
+ int i;
+
+ for (i = 0; i < PIC_COUNT; i++) {
+- /* Write vectore ID */
++ /* Write vectored ID */
+ writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i));
+ /* Hardcode route to HT0 Lo */
+ writeb(1, priv->base + PCH_INT_ROUTE(i));
+diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c
+index bc7aebcc96e9c..e50676ce2ec84 100644
+--- a/drivers/irqchip/irq-meson-gpio.c
++++ b/drivers/irqchip/irq-meson-gpio.c
+@@ -227,7 +227,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl,
+
+ /*
+ * Get the hwirq number assigned to this channel through
+- * a pointer the channel_irq table. The added benifit of this
++ * a pointer the channel_irq table. The added benefit of this
+ * method is that we can also retrieve the channel index with
+ * it, using the table base.
+ */
+diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c
+index 69ba8ce3c1785..9bca0918078e8 100644
+--- a/drivers/irqchip/irq-mtk-cirq.c
++++ b/drivers/irqchip/irq-mtk-cirq.c
+@@ -217,7 +217,7 @@ static void mtk_cirq_resume(void)
+ {
+ u32 value;
+
+- /* flush recored interrupts, will send signals to parent controller */
++ /* flush recorded interrupts, will send signals to parent controller */
+ value = readl_relaxed(cirq_data->base + CIRQ_CONTROL);
+ writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL);
+
+diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c
+index a671938fd97f6..d1f5740cd5755 100644
+--- a/drivers/irqchip/irq-mxs.c
++++ b/drivers/irqchip/irq-mxs.c
+@@ -58,7 +58,7 @@ struct icoll_priv {
+ static struct icoll_priv icoll_priv;
+ static struct irq_domain *icoll_domain;
+
+-/* calculate bit offset depending on number of intterupt per register */
++/* calculate bit offset depending on number of interrupt per register */
+ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit)
+ {
+ /*
+@@ -68,7 +68,7 @@ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit)
+ return bit << ((d->hwirq & 3) << 3);
+ }
+
+-/* calculate mem offset depending on number of intterupt per register */
++/* calculate mem offset depending on number of interrupt per register */
+ static void __iomem *icoll_intr_reg(struct irq_data *d)
+ {
+ /* offset = hwirq / intr_per_reg * 0x10 */
+diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c
+index fb78d6623556c..9ea94456b178c 100644
+--- a/drivers/irqchip/irq-sun4i.c
++++ b/drivers/irqchip/irq-sun4i.c
+@@ -189,7 +189,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs)
+ * 3) spurious irq
+ * So if we immediately get a reading of 0, check the irq-pending reg
+ * to differentiate between 2 and 3. We only do this once to avoid
+- * the extra check in the common case of 1 hapening after having
++ * the extra check in the common case of 1 happening after having
+ * read the vector-reg once.
+ */
+ hwirq = readl(irq_ic_data->irq_base + SUN4I_IRQ_VECTOR_REG) >> 2;
+diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c
+index 532d0ae172d9f..ca1f593f4d13a 100644
+--- a/drivers/irqchip/irq-ti-sci-inta.c
++++ b/drivers/irqchip/irq-ti-sci-inta.c
+@@ -78,7 +78,7 @@ struct ti_sci_inta_vint_desc {
+ * struct ti_sci_inta_irq_domain - Structure representing a TISCI based
+ * Interrupt Aggregator IRQ domain.
+ * @sci: Pointer to TISCI handle
+- * @vint: TISCI resource pointer representing IA inerrupts.
++ * @vint: TISCI resource pointer representing IA interrupts.
+ * @global_event: TISCI resource pointer representing global events.
+ * @vint_list: List of the vints active in the system
+ * @vint_mutex: Mutex to protect vint_list
+diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c
+index e460363742272..62f3d29f90420 100644
+--- a/drivers/irqchip/irq-vic.c
++++ b/drivers/irqchip/irq-vic.c
+@@ -163,7 +163,7 @@ static struct syscore_ops vic_syscore_ops = {
+ };
+
+ /**
+- * vic_pm_init - initicall to register VIC pm
++ * vic_pm_init - initcall to register VIC pm
+ *
+ * This is called via late_initcall() to register
+ * the resources for the VICs due to the early
+@@ -397,7 +397,7 @@ static void __init vic_clear_interrupts(void __iomem *base)
+ /*
+ * The PL190 cell from ARM has been modified by ST to handle 64 interrupts.
+ * The original cell has 32 interrupts, while the modified one has 64,
+- * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case
++ * replicating two blocks 0x00..0x1f in 0x20..0x3f. In that case
+ * the probe function is called twice, with base set to offset 000
+ * and 020 within the page. We call this "second block".
+ */
+diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c
+index 1d3d273309bd3..8cd1bfc730572 100644
+--- a/drivers/irqchip/irq-xilinx-intc.c
++++ b/drivers/irqchip/irq-xilinx-intc.c
+@@ -210,7 +210,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc,
+
+ /*
+ * Disable all external interrupts until they are
+- * explicity requested.
++ * explicitly requested.
+ */
+ xintc_write(irqc, IER, 0);
+
+diff --git a/include/linux/irq.h b/include/linux/irq.h
+index 607bee9271bd7..b89a8ac83d1bc 100644
+--- a/include/linux/irq.h
++++ b/include/linux/irq.h
+@@ -116,7 +116,7 @@ enum {
+ * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity
+ * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to
+ * support stacked irqchips, which indicates skipping
+- * all descendent irqchips.
++ * all descendant irqchips.
+ */
+ enum {
+ IRQ_SET_MASK_OK = 0,
+@@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d)
+
+ /*
+ * Must only be called of irqchip.irq_set_affinity() or low level
+- * hieararchy domain allocation functions.
++ * hierarchy domain allocation functions.
+ */
+ static inline void irqd_set_single_target(struct irq_data *d)
+ {
+diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
+index 5745491303e03..fdb22e0f9a91e 100644
+--- a/include/linux/irqdesc.h
++++ b/include/linux/irqdesc.h
+@@ -32,7 +32,7 @@ struct pt_regs;
+ * @last_unhandled: aging timer for unhandled count
+ * @irqs_unhandled: stats field for spurious unhandled interrupts
+ * @threads_handled: stats field for deferred spurious detection of threaded handlers
+- * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers
++ * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers
+ * @lock: locking for SMP
+ * @affinity_hint: hint to user space for preferred irq affinity
+ * @affinity_notify: context for notification of affinity changes
+diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
+index 621d8dd157bc1..e7d284261d450 100644
+--- a/kernel/irq/chip.c
++++ b/kernel/irq/chip.c
+@@ -811,7 +811,7 @@ void handle_edge_irq(struct irq_desc *desc)
+ /*
+ * When another irq arrived while we were handling
+ * one, we could have masked the irq.
+- * Renable it, if it was not disabled in meantime.
++ * Reenable it, if it was not disabled in meantime.
+ */
+ if (unlikely(desc->istate & IRQS_PENDING)) {
+ if (!irqd_irq_disabled(&desc->irq_data) &&
+diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c
+index 0b0cdf206dc44..7fe6cffe7d0df 100644
+--- a/kernel/irq/dummychip.c
++++ b/kernel/irq/dummychip.c
+@@ -13,7 +13,7 @@
+
+ /*
+ * What should we do if we get a hw irq event on an illegal vector?
+- * Each architecture has to answer this themself.
++ * Each architecture has to answer this themselves.
+ */
+ static void ack_bad(struct irq_data *data)
+ {
+diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
+index 9b0914a063f90..6c009a033c73f 100644
+--- a/kernel/irq/irqdesc.c
++++ b/kernel/irq/irqdesc.c
+@@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str)
+ cpulist_parse(str, irq_default_affinity);
+ /*
+ * Set at least the boot cpu. We don't want to end up with
+- * bugreports caused by random comandline masks
++ * bugreports caused by random commandline masks
+ */
+ cpumask_set_cpu(smp_processor_id(), irq_default_affinity);
+ return 1;
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 1720998933f8d..fe07888a7d96a 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops);
+ * @name: Optional user provided domain name
+ * @pa: Optional user-provided physical address
+ *
+- * Allocate a struct irqchip_fwid, and return a poiner to the embedded
++ * Allocate a struct irqchip_fwid, and return a pointer to the embedded
+ * fwnode_handle (or NULL on failure).
+ *
+ * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are
+@@ -657,7 +657,7 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+
+ pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+
+- /* Look for default domain if nececssary */
++ /* Look for default domain if necessary */
+ if (domain == NULL)
+ domain = irq_default_domain;
+ if (domain == NULL) {
+@@ -893,7 +893,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
+ {
+ struct irq_data *data;
+
+- /* Look for default domain if nececssary */
++ /* Look for default domain if necessary */
+ if (domain == NULL)
+ domain = irq_default_domain;
+ if (domain == NULL)
+@@ -1423,7 +1423,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+- * irq_domain_activate_irq(), is to program hardwares with preallocated
++ * irq_domain_activate_irq(), is to program the hardware with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index 437b073dc487e..0159925054faa 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -341,7 +341,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data,
+ * If the interrupt is not yet activated, just store the affinity
+ * mask and do not call the chip driver at all. On activation the
+ * driver has to make sure anyway that the interrupt is in a
+- * useable state so startup works.
++ * usable state so startup works.
+ */
+ if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) ||
+ irqd_is_activated(data) || !irqd_affinity_on_activate(data))
+@@ -999,7 +999,7 @@ static void irq_finalize_oneshot(struct irq_desc *desc,
+ * to IRQS_INPROGRESS and the irq line is masked forever.
+ *
+ * This also serializes the state of shared oneshot handlers
+- * versus "desc->threads_onehsot |= action->thread_mask;" in
++ * versus "desc->threads_oneshot |= action->thread_mask;" in
+ * irq_wake_thread(). See the comment there which explains the
+ * serialization.
+ */
+@@ -1877,7 +1877,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id)
+ /* Last action releases resources */
+ if (!desc->action) {
+ /*
+- * Reaquire bus lock as irq_release_resources() might
++ * Reacquire bus lock as irq_release_resources() might
+ * require it to deallocate resources over the slow bus.
+ */
+ chip_bus_lock(desc);
+diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
+index b47d95b68ac1a..4457f3e966d0e 100644
+--- a/kernel/irq/msi.c
++++ b/kernel/irq/msi.c
+@@ -5,7 +5,7 @@
+ *
+ * This file is licensed under GPLv2.
+ *
+- * This file contains common code to support Message Signalled Interrupt for
++ * This file contains common code to support Message Signaled Interrupts for
+ * PCI compatible and non PCI compatible devices.
+ */
+ #include <linux/types.h>
+diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
+index 1f981162648a3..00d45b6bd8f89 100644
+--- a/kernel/irq/timings.c
++++ b/kernel/irq/timings.c
+@@ -490,7 +490,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
+
+ /*
+ * The interrupt triggered more than one second apart, that
+- * ends the sequence as predictible for our purpose. In this
++ * ends the sequence as predictable for our purpose. In this
+ * case, assume we have the beginning of a sequence and the
+ * timestamp is the first value. As it is impossible to
+ * predict anything at this point, return.
+--
+2.39.2
+
--- /dev/null
+From bfa01f225d22fffdf704f267bcb38e4f9735562b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Sep 2021 10:52:03 +0800
+Subject: irqdomain: Change the type of 'size' in __irq_domain_add() to be
+ consistent
+
+From: Bixuan Cui <cuibixuan@huawei.com>
+
+[ Upstream commit 20c36ce2164f1774b487d443ece99b754bc6ad43 ]
+
+The 'size' is used in struct_size(domain, revmap, size) and its input
+parameter type is 'size_t'(unsigned int).
+Changing the size to 'unsigned int' to make the type consistent.
+
+Signed-off-by: Bixuan Cui <cuibixuan@huawei.com>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com
+Stable-dep-of: 8932c32c3053 ("irqdomain: Fix domain registration race")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/irqdomain.h | 2 +-
+ kernel/irq/irqdomain.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
+index ea5a337e0f8b8..9b9743f7538c4 100644
+--- a/include/linux/irqdomain.h
++++ b/include/linux/irqdomain.h
+@@ -256,7 +256,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa)
+ }
+
+ void irq_domain_free_fwnode(struct fwnode_handle *fwnode);
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data);
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 245e317c72908..426242c8903d4 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -130,7 +130,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
+ * Allocates and initializes an irq_domain structure.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+ irq_hw_number_t hwirq_max, int direct_max,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+--
+2.39.2
+
--- /dev/null
+From 4ec2537dd6d7bd9da61dca871a019630766b8f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:49 +0100
+Subject: irqdomain: Fix domain registration race
+
+From: Marc Zyngier <maz@kernel.org>
+
+[ Upstream commit 8932c32c3053accd50702b36e944ac2016cd103c ]
+
+Hierarchical domains created using irq_domain_create_hierarchy() are
+currently added to the domain list before having been fully initialised.
+
+This specifically means that a racing allocation request might fail to
+allocate irq data for the inner domains of a hierarchy in case the
+parent domain pointer has not yet been set up.
+
+Note that this is not really any issue for irqchip drivers that are
+registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE())
+but could potentially cause trouble with drivers that are registered
+later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(),
+gpiochip drivers, etc.).
+
+Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()")
+Cc: stable@vger.kernel.org # 3.19
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+[ johan: add commit message ]
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 62 +++++++++++++++++++++++++++++-------------
+ 1 file changed, 43 insertions(+), 19 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index 426242c8903d4..fd3f7c16c299a 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -117,23 +117,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode)
+ }
+ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode);
+
+-/**
+- * __irq_domain_add() - Allocate a new irq_domain data structure
+- * @fwnode: firmware node for the interrupt controller
+- * @size: Size of linear map; 0 for radix mapping only
+- * @hwirq_max: Maximum number of interrupts supported by controller
+- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
+- * direct mapping
+- * @ops: domain callbacks
+- * @host_data: Controller private data pointer
+- *
+- * Allocates and initializes an irq_domain structure.
+- * Returns pointer to IRQ domain, or NULL on failure.
+- */
+-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
+- irq_hw_number_t hwirq_max, int direct_max,
+- const struct irq_domain_ops *ops,
+- void *host_data)
++static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode,
++ unsigned int size,
++ irq_hw_number_t hwirq_max,
++ int direct_max,
++ const struct irq_domain_ops *ops,
++ void *host_data)
+ {
+ struct irqchip_fwid *fwid;
+ struct irq_domain *domain;
+@@ -210,12 +199,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s
+ domain->revmap_direct_max_irq = direct_max;
+ irq_domain_check_hierarchy(domain);
+
++ return domain;
++}
++
++static void __irq_domain_publish(struct irq_domain *domain)
++{
+ mutex_lock(&irq_domain_mutex);
+ debugfs_add_domain_dir(domain);
+ list_add(&domain->link, &irq_domain_list);
+ mutex_unlock(&irq_domain_mutex);
+
+ pr_debug("Added domain %s\n", domain->name);
++}
++
++/**
++ * __irq_domain_add() - Allocate a new irq_domain data structure
++ * @fwnode: firmware node for the interrupt controller
++ * @size: Size of linear map; 0 for radix mapping only
++ * @hwirq_max: Maximum number of interrupts supported by controller
++ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
++ * direct mapping
++ * @ops: domain callbacks
++ * @host_data: Controller private data pointer
++ *
++ * Allocates and initializes an irq_domain structure.
++ * Returns pointer to IRQ domain, or NULL on failure.
++ */
++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size,
++ irq_hw_number_t hwirq_max, int direct_max,
++ const struct irq_domain_ops *ops,
++ void *host_data)
++{
++ struct irq_domain *domain;
++
++ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max,
++ ops, host_data);
++ if (domain)
++ __irq_domain_publish(domain);
++
+ return domain;
+ }
+ EXPORT_SYMBOL_GPL(__irq_domain_add);
+@@ -1110,12 +1131,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent,
+ struct irq_domain *domain;
+
+ if (size)
+- domain = irq_domain_create_linear(fwnode, size, ops, host_data);
++ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data);
+ else
+- domain = irq_domain_create_tree(fwnode, ops, host_data);
++ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data);
++
+ if (domain) {
+ domain->parent = parent;
+ domain->flags |= flags;
++
++ __irq_domain_publish(domain);
+ }
+
+ return domain;
+--
+2.39.2
+
--- /dev/null
+From c1c69cf22e1bc56cc9387569f82fa6bd417f7172 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:48 +0100
+Subject: irqdomain: Fix mapping-creation race
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 601363cc08da25747feb87c55573dd54de91d66a ]
+
+Parallel probing of devices that share interrupts (e.g. when a driver
+uses asynchronous probing) can currently result in two mappings for the
+same hardware interrupt to be created due to missing serialisation.
+
+Make sure to hold the irq_domain_mutex when creating mappings so that
+looking for an existing mapping before creating a new one is done
+atomically.
+
+Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers")
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Link: https://lore.kernel.org/r/YuJXMHoT4ijUxnRb@hovoldconsulting.com
+Cc: stable@vger.kernel.org # 4.8
+Cc: Dmitry Torokhov <dtor@chromium.org>
+Cc: Jon Hunter <jonathanh@nvidia.com>
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-7-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 64 ++++++++++++++++++++++++++++++------------
+ 1 file changed, 46 insertions(+), 18 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index a1e1433a07754..245e317c72908 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex);
+
+ static struct irq_domain *irq_default_domain;
+
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity);
+ static void irq_domain_check_hierarchy(struct irq_domain *domain);
+
+ struct irqchip_fwid {
+@@ -637,9 +640,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
+ }
+ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+
+-static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+- irq_hw_number_t hwirq,
+- const struct irq_affinity_desc *affinity)
++static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain,
++ irq_hw_number_t hwirq,
++ const struct irq_affinity_desc *affinity)
+ {
+ struct device_node *of_node = irq_domain_get_of_node(domain);
+ int virq;
+@@ -654,7 +657,7 @@ static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
+ return 0;
+ }
+
+- if (irq_domain_associate(domain, virq, hwirq)) {
++ if (irq_domain_associate_locked(domain, virq, hwirq)) {
+ irq_free_desc(virq);
+ return 0;
+ }
+@@ -690,14 +693,20 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+ return 0;
+ }
+
++ mutex_lock(&irq_domain_mutex);
++
+ /* Check if mapping already exists */
+ virq = irq_find_mapping(domain, hwirq);
+ if (virq) {
+ pr_debug("existing mapping on virq %d\n", virq);
+- return virq;
++ goto out;
+ }
+
+- return __irq_create_mapping_affinity(domain, hwirq, affinity);
++ virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity);
++out:
++ mutex_unlock(&irq_domain_mutex);
++
++ return virq;
+ }
+ EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
+
+@@ -799,6 +808,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK))
+ type &= IRQ_TYPE_SENSE_MASK;
+
++ mutex_lock(&irq_domain_mutex);
++
+ /*
+ * If we've already configured this interrupt,
+ * don't do it again, or hell will break loose.
+@@ -811,7 +822,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ * interrupt number.
+ */
+ if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq))
+- return virq;
++ goto out;
+
+ /*
+ * If the trigger type has not been set yet, then set
+@@ -819,35 +830,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ */
+ if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) {
+ irq_data = irq_get_irq_data(virq);
+- if (!irq_data)
+- return 0;
++ if (!irq_data) {
++ virq = 0;
++ goto out;
++ }
+
+ irqd_set_trigger_type(irq_data, type);
+- return virq;
++ goto out;
+ }
+
+ pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n",
+ hwirq, of_node_full_name(to_of_node(fwspec->fwnode)));
+- return 0;
++ virq = 0;
++ goto out;
+ }
+
+ if (irq_domain_is_hierarchy(domain)) {
+- virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec);
+- if (virq <= 0)
+- return 0;
++ virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE,
++ fwspec, false, NULL);
++ if (virq <= 0) {
++ virq = 0;
++ goto out;
++ }
+ } else {
+ /* Create mapping */
+- virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
++ virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL);
+ if (!virq)
+- return virq;
++ goto out;
+ }
+
+ irq_data = irq_get_irq_data(virq);
+- if (WARN_ON(!irq_data))
+- return 0;
++ if (WARN_ON(!irq_data)) {
++ virq = 0;
++ goto out;
++ }
+
+ /* Store trigger type */
+ irqd_set_trigger_type(irq_data, type);
++out:
++ mutex_unlock(&irq_domain_mutex);
+
+ return virq;
+ }
+@@ -1856,6 +1877,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+ irq_set_handler_data(virq, handler_data);
+ }
+
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
++{
++ return -EINVAL;
++}
++
+ static void irq_domain_check_hierarchy(struct irq_domain *domain)
+ {
+ }
+--
+2.39.2
+
--- /dev/null
+From 85884bb78e6116baa316676ba22c6ffc9cba7a43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:46 +0100
+Subject: irqdomain: Look for existing mapping only once
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 6e6f75c9c98d2d246d90411ff2b6f0cd271f4cba ]
+
+Avoid looking for an existing mapping twice when creating a new mapping
+using irq_create_fwspec_mapping() by factoring out the actual allocation
+which is shared with irq_create_mapping_affinity().
+
+The new helper function will also be used to fix a shared-interrupt
+mapping race, hence the Fixes tag.
+
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Cc: stable@vger.kernel.org # 4.8
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-5-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 60 +++++++++++++++++++++++-------------------
+ 1 file changed, 33 insertions(+), 27 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index fe07888a7d96a..d18c25a41673f 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -637,6 +637,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain)
+ }
+ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+
++static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain,
++ irq_hw_number_t hwirq,
++ const struct irq_affinity_desc *affinity)
++{
++ struct device_node *of_node = irq_domain_get_of_node(domain);
++ int virq;
++
++ pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
++
++ /* Allocate a virtual interrupt number */
++ virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
++ affinity);
++ if (virq <= 0) {
++ pr_debug("-> virq allocation failed\n");
++ return 0;
++ }
++
++ if (irq_domain_associate(domain, virq, hwirq)) {
++ irq_free_desc(virq);
++ return 0;
++ }
++
++ pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
++ hwirq, of_node_full_name(of_node), virq);
++
++ return virq;
++}
++
+ /**
+ * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space
+ * @domain: domain owning this hardware interrupt or NULL for default domain
+@@ -649,14 +677,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
+ * on the number returned from that call.
+ */
+ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+- irq_hw_number_t hwirq,
+- const struct irq_affinity_desc *affinity)
++ irq_hw_number_t hwirq,
++ const struct irq_affinity_desc *affinity)
+ {
+- struct device_node *of_node;
+ int virq;
+
+- pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
+-
+ /* Look for default domain if necessary */
+ if (domain == NULL)
+ domain = irq_default_domain;
+@@ -664,34 +689,15 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain,
+ WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq);
+ return 0;
+ }
+- pr_debug("-> using domain @%p\n", domain);
+-
+- of_node = irq_domain_get_of_node(domain);
+
+ /* Check if mapping already exists */
+ virq = irq_find_mapping(domain, hwirq);
+ if (virq) {
+- pr_debug("-> existing mapping on virq %d\n", virq);
++ pr_debug("existing mapping on virq %d\n", virq);
+ return virq;
+ }
+
+- /* Allocate a virtual interrupt number */
+- virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node),
+- affinity);
+- if (virq <= 0) {
+- pr_debug("-> virq allocation failed\n");
+- return 0;
+- }
+-
+- if (irq_domain_associate(domain, virq, hwirq)) {
+- irq_free_desc(virq);
+- return 0;
+- }
+-
+- pr_debug("irq %lu on domain %s mapped to virtual irq %u\n",
+- hwirq, of_node_full_name(of_node), virq);
+-
+- return virq;
++ return __irq_create_mapping_affinity(domain, hwirq, affinity);
+ }
+ EXPORT_SYMBOL_GPL(irq_create_mapping_affinity);
+
+@@ -831,7 +837,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec)
+ return 0;
+ } else {
+ /* Create mapping */
+- virq = irq_create_mapping(domain, hwirq);
++ virq = __irq_create_mapping_affinity(domain, hwirq, NULL);
+ if (!virq)
+ return virq;
+ }
+--
+2.39.2
+
--- /dev/null
+From af649d9740bab8166d36ad7c818c2e53c22945f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Feb 2023 11:42:47 +0100
+Subject: irqdomain: Refactor __irq_domain_alloc_irqs()
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit d55f7f4c58c07beb5050a834bf57ae2ede599c7e ]
+
+Refactor __irq_domain_alloc_irqs() so that it can be called internally
+while holding the irq_domain_mutex.
+
+This will be used to fix a shared-interrupt mapping race, hence the
+Fixes tag.
+
+Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings")
+Cc: stable@vger.kernel.org # 4.8
+Tested-by: Hsin-Yi Wang <hsinyi@chromium.org>
+Tested-by: Mark-PK Tsai <mark-pk.tsai@mediatek.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++-------------------
+ 1 file changed, 48 insertions(+), 40 deletions(-)
+
+diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
+index d18c25a41673f..a1e1433a07754 100644
+--- a/kernel/irq/irqdomain.c
++++ b/kernel/irq/irqdomain.c
+@@ -1411,40 +1411,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain,
+ return domain->ops->alloc(domain, irq_base, nr_irqs, arg);
+ }
+
+-/**
+- * __irq_domain_alloc_irqs - Allocate IRQs from domain
+- * @domain: domain to allocate from
+- * @irq_base: allocate specified IRQ number if irq_base >= 0
+- * @nr_irqs: number of IRQs to allocate
+- * @node: NUMA node id for memory allocation
+- * @arg: domain specific argument
+- * @realloc: IRQ descriptors have already been allocated if true
+- * @affinity: Optional irq affinity mask for multiqueue devices
+- *
+- * Allocate IRQ numbers and initialized all data structures to support
+- * hierarchy IRQ domains.
+- * Parameter @realloc is mainly to support legacy IRQs.
+- * Returns error code or allocated IRQ number
+- *
+- * The whole process to setup an IRQ has been split into two steps.
+- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+- * descriptor and required hardware resources. The second step,
+- * irq_domain_activate_irq(), is to program the hardware with preallocated
+- * resources. In this way, it's easier to rollback when failing to
+- * allocate resources.
+- */
+-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+- unsigned int nr_irqs, int node, void *arg,
+- bool realloc, const struct irq_affinity_desc *affinity)
++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
+ {
+ int i, ret, virq;
+
+- if (domain == NULL) {
+- domain = irq_default_domain;
+- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+- return -EINVAL;
+- }
+-
+ if (realloc && irq_base >= 0) {
+ virq = irq_base;
+ } else {
+@@ -1463,24 +1435,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ goto out_free_desc;
+ }
+
+- mutex_lock(&irq_domain_mutex);
+ ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg);
+- if (ret < 0) {
+- mutex_unlock(&irq_domain_mutex);
++ if (ret < 0)
+ goto out_free_irq_data;
+- }
+
+ for (i = 0; i < nr_irqs; i++) {
+ ret = irq_domain_trim_hierarchy(virq + i);
+- if (ret) {
+- mutex_unlock(&irq_domain_mutex);
++ if (ret)
+ goto out_free_irq_data;
+- }
+ }
+-
++
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_insert_irq(virq + i);
+- mutex_unlock(&irq_domain_mutex);
+
+ return virq;
+
+@@ -1491,6 +1457,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+ return ret;
+ }
+
++/**
++ * __irq_domain_alloc_irqs - Allocate IRQs from domain
++ * @domain: domain to allocate from
++ * @irq_base: allocate specified IRQ number if irq_base >= 0
++ * @nr_irqs: number of IRQs to allocate
++ * @node: NUMA node id for memory allocation
++ * @arg: domain specific argument
++ * @realloc: IRQ descriptors have already been allocated if true
++ * @affinity: Optional irq affinity mask for multiqueue devices
++ *
++ * Allocate IRQ numbers and initialized all data structures to support
++ * hierarchy IRQ domains.
++ * Parameter @realloc is mainly to support legacy IRQs.
++ * Returns error code or allocated IRQ number
++ *
++ * The whole process to setup an IRQ has been split into two steps.
++ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
++ * descriptor and required hardware resources. The second step,
++ * irq_domain_activate_irq(), is to program the hardware with preallocated
++ * resources. In this way, it's easier to rollback when failing to
++ * allocate resources.
++ */
++int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
++ unsigned int nr_irqs, int node, void *arg,
++ bool realloc, const struct irq_affinity_desc *affinity)
++{
++ int ret;
++
++ if (domain == NULL) {
++ domain = irq_default_domain;
++ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
++ return -EINVAL;
++ }
++
++ mutex_lock(&irq_domain_mutex);
++ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg,
++ realloc, affinity);
++ mutex_unlock(&irq_domain_mutex);
++
++ return ret;
++}
++
+ /* The irq_data was moved, fix the revmap to refer to the new location */
+ static void irq_domain_fix_revmap(struct irq_data *d)
+ {
+--
+2.39.2
+
--- /dev/null
+From ab07529d96375f4117929020ea88db6ed07d8abf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Apr 2021 17:41:11 +0200
+Subject: landlock: Add object management
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+[ Upstream commit 90945448e9830aa1b39d7acaa4e0724a001e2ff8 ]
+
+A Landlock object enables to identify a kernel object (e.g. an inode).
+A Landlock rule is a set of access rights allowed on an object. Rules
+are grouped in rulesets that may be tied to a set of processes (i.e.
+subjects) to enforce a scoped access-control (i.e. a domain).
+
+Because Landlock's goal is to empower any process (especially
+unprivileged ones) to sandbox themselves, we cannot rely on a
+system-wide object identification such as file extended attributes.
+Indeed, we need innocuous, composable and modular access-controls.
+
+The main challenge with these constraints is to identify kernel objects
+while this identification is useful (i.e. when a security policy makes
+use of this object). But this identification data should be freed once
+no policy is using it. This ephemeral tagging should not and may not be
+written in the filesystem. We then need to manage the lifetime of a
+rule according to the lifetime of its objects. To avoid a global lock,
+this implementation make use of RCU and counters to safely reference
+objects.
+
+A following commit uses this generic object management for inodes.
+
+Cc: James Morris <jmorris@namei.org>
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Acked-by: Serge Hallyn <serge@hallyn.com>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210422154123.13086-2-mic@digikod.net
+Signed-off-by: James Morris <jamorris@linux.microsoft.com>
+Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS | 10 +++++
+ security/Kconfig | 1 +
+ security/Makefile | 2 +
+ security/landlock/Kconfig | 21 +++++++++
+ security/landlock/Makefile | 3 ++
+ security/landlock/object.c | 67 ++++++++++++++++++++++++++++
+ security/landlock/object.h | 91 ++++++++++++++++++++++++++++++++++++++
+ 7 files changed, 195 insertions(+)
+ create mode 100644 security/landlock/Kconfig
+ create mode 100644 security/landlock/Makefile
+ create mode 100644 security/landlock/object.c
+ create mode 100644 security/landlock/object.h
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 6c5efc4013ab5..72815c1a325eb 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9836,6 +9836,16 @@ F: net/core/sock_map.c
+ F: net/ipv4/tcp_bpf.c
+ F: net/ipv4/udp_bpf.c
+
++LANDLOCK SECURITY MODULE
++M: Mickaël Salaün <mic@digikod.net>
++L: linux-security-module@vger.kernel.org
++S: Supported
++W: https://landlock.io
++T: git https://github.com/landlock-lsm/linux.git
++F: security/landlock/
++K: landlock
++K: LANDLOCK
++
+ LANTIQ / INTEL Ethernet drivers
+ M: Hauke Mehrtens <hauke@hauke-m.de>
+ L: netdev@vger.kernel.org
+diff --git a/security/Kconfig b/security/Kconfig
+index 9893c316da897..7cb5476306676 100644
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -230,6 +230,7 @@ source "security/loadpin/Kconfig"
+ source "security/yama/Kconfig"
+ source "security/safesetid/Kconfig"
+ source "security/lockdown/Kconfig"
++source "security/landlock/Kconfig"
+
+ source "security/integrity/Kconfig"
+
+diff --git a/security/Makefile b/security/Makefile
+index 3baf435de5411..47e432900e242 100644
+--- a/security/Makefile
++++ b/security/Makefile
+@@ -13,6 +13,7 @@ subdir-$(CONFIG_SECURITY_LOADPIN) += loadpin
+ subdir-$(CONFIG_SECURITY_SAFESETID) += safesetid
+ subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown
+ subdir-$(CONFIG_BPF_LSM) += bpf
++subdir-$(CONFIG_SECURITY_LANDLOCK) += landlock
+
+ # always enable default capabilities
+ obj-y += commoncap.o
+@@ -32,6 +33,7 @@ obj-$(CONFIG_SECURITY_SAFESETID) += safesetid/
+ obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown/
+ obj-$(CONFIG_CGROUPS) += device_cgroup.o
+ obj-$(CONFIG_BPF_LSM) += bpf/
++obj-$(CONFIG_SECURITY_LANDLOCK) += landlock/
+
+ # Object integrity file lists
+ subdir-$(CONFIG_INTEGRITY) += integrity
+diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig
+new file mode 100644
+index 0000000000000..c1e862a384107
+--- /dev/null
++++ b/security/landlock/Kconfig
+@@ -0,0 +1,21 @@
++# SPDX-License-Identifier: GPL-2.0-only
++
++config SECURITY_LANDLOCK
++ bool "Landlock support"
++ depends on SECURITY
++ select SECURITY_PATH
++ help
++ Landlock is a sandboxing mechanism that enables processes to restrict
++ themselves (and their future children) by gradually enforcing
++ tailored access control policies. A Landlock security policy is a
++ set of access rights (e.g. open a file in read-only, make a
++ directory, etc.) tied to a file hierarchy. Such policy can be
++ configured and enforced by any processes for themselves using the
++ dedicated system calls: landlock_create_ruleset(),
++ landlock_add_rule(), and landlock_restrict_self().
++
++ See Documentation/userspace-api/landlock.rst for further information.
++
++ If you are unsure how to answer this question, answer N. Otherwise,
++ you should also prepend "landlock," to the content of CONFIG_LSM to
++ enable Landlock at boot time.
+diff --git a/security/landlock/Makefile b/security/landlock/Makefile
+new file mode 100644
+index 0000000000000..cb6deefbf4c09
+--- /dev/null
++++ b/security/landlock/Makefile
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
++
++landlock-y := object.o
+diff --git a/security/landlock/object.c b/security/landlock/object.c
+new file mode 100644
+index 0000000000000..d674fdf9ff04f
+--- /dev/null
++++ b/security/landlock/object.c
+@@ -0,0 +1,67 @@
++// SPDX-License-Identifier: GPL-2.0-only
++/*
++ * Landlock LSM - Object management
++ *
++ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2018-2020 ANSSI
++ */
++
++#include <linux/bug.h>
++#include <linux/compiler_types.h>
++#include <linux/err.h>
++#include <linux/kernel.h>
++#include <linux/rcupdate.h>
++#include <linux/refcount.h>
++#include <linux/slab.h>
++#include <linux/spinlock.h>
++
++#include "object.h"
++
++struct landlock_object *landlock_create_object(
++ const struct landlock_object_underops *const underops,
++ void *const underobj)
++{
++ struct landlock_object *new_object;
++
++ if (WARN_ON_ONCE(!underops || !underobj))
++ return ERR_PTR(-ENOENT);
++ new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT);
++ if (!new_object)
++ return ERR_PTR(-ENOMEM);
++ refcount_set(&new_object->usage, 1);
++ spin_lock_init(&new_object->lock);
++ new_object->underops = underops;
++ new_object->underobj = underobj;
++ return new_object;
++}
++
++/*
++ * The caller must own the object (i.e. thanks to object->usage) to safely put
++ * it.
++ */
++void landlock_put_object(struct landlock_object *const object)
++{
++ /*
++ * The call to @object->underops->release(object) might sleep, e.g.
++ * because of iput().
++ */
++ might_sleep();
++ if (!object)
++ return;
++
++ /*
++ * If the @object's refcount cannot drop to zero, we can just decrement
++ * the refcount without holding a lock. Otherwise, the decrement must
++ * happen under @object->lock for synchronization with things like
++ * get_inode_object().
++ */
++ if (refcount_dec_and_lock(&object->usage, &object->lock)) {
++ __acquire(&object->lock);
++ /*
++ * With @object->lock initially held, remove the reference from
++ * @object->underobj to @object (if it still exists).
++ */
++ object->underops->release(object);
++ kfree_rcu(object, rcu_free);
++ }
++}
+diff --git a/security/landlock/object.h b/security/landlock/object.h
+new file mode 100644
+index 0000000000000..3f80674c6c8d3
+--- /dev/null
++++ b/security/landlock/object.h
+@@ -0,0 +1,91 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++/*
++ * Landlock LSM - Object management
++ *
++ * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2018-2020 ANSSI
++ */
++
++#ifndef _SECURITY_LANDLOCK_OBJECT_H
++#define _SECURITY_LANDLOCK_OBJECT_H
++
++#include <linux/compiler_types.h>
++#include <linux/refcount.h>
++#include <linux/spinlock.h>
++
++struct landlock_object;
++
++/**
++ * struct landlock_object_underops - Operations on an underlying object
++ */
++struct landlock_object_underops {
++ /**
++ * @release: Releases the underlying object (e.g. iput() for an inode).
++ */
++ void (*release)(struct landlock_object *const object)
++ __releases(object->lock);
++};
++
++/**
++ * struct landlock_object - Security blob tied to a kernel object
++ *
++ * The goal of this structure is to enable to tie a set of ephemeral access
++ * rights (pertaining to different domains) to a kernel object (e.g an inode)
++ * in a safe way. This implies to handle concurrent use and modification.
++ *
++ * The lifetime of a &struct landlock_object depends on the rules referring to
++ * it.
++ */
++struct landlock_object {
++ /**
++ * @usage: This counter is used to tie an object to the rules matching
++ * it or to keep it alive while adding a new rule. If this counter
++ * reaches zero, this struct must not be modified, but this counter can
++ * still be read from within an RCU read-side critical section. When
++ * adding a new rule to an object with a usage counter of zero, we must
++ * wait until the pointer to this object is set to NULL (or recycled).
++ */
++ refcount_t usage;
++ /**
++ * @lock: Protects against concurrent modifications. This lock must be
++ * held from the time @usage drops to zero until any weak references
++ * from @underobj to this object have been cleaned up.
++ *
++ * Lock ordering: inode->i_lock nests inside this.
++ */
++ spinlock_t lock;
++ /**
++ * @underobj: Used when cleaning up an object and to mark an object as
++ * tied to its underlying kernel structure. This pointer is protected
++ * by @lock. Cf. landlock_release_inodes() and release_inode().
++ */
++ void *underobj;
++ union {
++ /**
++ * @rcu_free: Enables lockless use of @usage, @lock and
++ * @underobj from within an RCU read-side critical section.
++ * @rcu_free and @underops are only used by
++ * landlock_put_object().
++ */
++ struct rcu_head rcu_free;
++ /**
++ * @underops: Enables landlock_put_object() to release the
++ * underlying object (e.g. inode).
++ */
++ const struct landlock_object_underops *underops;
++ };
++};
++
++struct landlock_object *landlock_create_object(
++ const struct landlock_object_underops *const underops,
++ void *const underobj);
++
++void landlock_put_object(struct landlock_object *const object);
++
++static inline void landlock_get_object(struct landlock_object *const object)
++{
++ if (object)
++ refcount_inc(&object->usage);
++}
++
++#endif /* _SECURITY_LANDLOCK_OBJECT_H */
+--
+2.39.2
+
--- /dev/null
+From 9a4a568b921a12aa558adab24b2201df4e5b2419 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 01:39:13 +0900
+Subject: net: caif: Fix use-after-free in cfusbl_device_notify()
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 9781e98a97110f5e76999058368b4be76a788484 ]
+
+syzbot reported use-after-free in cfusbl_device_notify() [1]. This
+causes a stack trace like below:
+
+BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
+Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214
+
+CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Workqueue: netns cleanup_net
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313
+ print_report mm/kasan/report.c:429 [inline]
+ kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491
+ cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138
+ notifier_call_chain+0xb5/0x200 kernel/notifier.c:87
+ call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945
+ call_netdevice_notifiers_extack net/core/dev.c:1983 [inline]
+ call_netdevice_notifiers net/core/dev.c:1997 [inline]
+ netdev_wait_allrefs_any net/core/dev.c:10227 [inline]
+ netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341
+ default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334
+ ops_exit_list+0x125/0x170 net/core/net_namespace.c:167
+ cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594
+ process_one_work+0x996/0x1610 kernel/workqueue.c:2289
+ worker_thread+0x665/0x1080 kernel/workqueue.c:2436
+ kthread+0x2e9/0x3a0 kernel/kthread.c:376
+ ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302
+ </TASK>
+
+When unregistering a net device, unregister_netdevice_many_notify()
+sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers
+with NETDEV_UNREGISTER, and adds the device to the todo list.
+
+Later on, devices in the todo list are processed by netdev_run_todo().
+netdev_run_todo() waits devices' reference count become 1 while
+rebdoadcasting NETDEV_UNREGISTER notification.
+
+When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple
+times, the parent device might be freed. This could cause UAF.
+Processing NETDEV_UNREGISTER multiple times also causes inbalance of
+reference count for the module.
+
+This patch fixes the issue by accepting only first NETDEV_UNREGISTER
+notification.
+
+Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface")
+CC: sjur.brandeland@stericsson.com <sjur.brandeland@stericsson.com>
+Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1]
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/caif/caif_usb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c
+index b02e1292f7f19..24488a4e2d26e 100644
+--- a/net/caif/caif_usb.c
++++ b/net/caif/caif_usb.c
+@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what,
+ struct usb_device *usbdev;
+ int res;
+
++ if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED)
++ return 0;
++
+ /* Check whether we have a NCM device, and find its VID/PID. */
+ if (!(dev->dev.parent && dev->dev.parent->driver &&
+ strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0))
+--
+2.39.2
+
--- /dev/null
+From 087a540b3deaabe61acf91b3ef7446bd9a2519ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Mar 2023 13:43:20 +0000
+Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Daniel Golle <daniel@makrotopia.org>
+
+[ Upstream commit 193250ace270fecd586dd2d0dfbd9cbd2ade977f ]
+
+Fix data corruption issue with SerDes connected PHYs operating at 1.25
+Gbps speed where we could previously observe about 30% packet loss while
+the bad packet counter was increasing.
+
+As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531
+switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using
+rate-adaptation to 2500Base-X mode, this issue only got exposed now when
+we started trying to use SFP modules operating with 1.25 Gbps with the
+BananaPi R3 board.
+
+The fix is to set bit 12 which disables the RX FIFO clear function when
+setting up MAC MCR, MediaTek SDK did the same change stating:
+"If without this patch, kernel might receive invalid packets that are
+corrupted by GMAC."[1]
+
+[1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63
+
+Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC")
+Tested-by: Bjørn Mork <bjorn@mork.no>
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++-
+ drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 217dc67c48fa2..a8319295f1ab2 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -354,7 +354,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode,
+ mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id));
+ mcr_new = mcr_cur;
+ mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE |
+- MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK;
++ MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK |
++ MAC_MCR_RX_FIFO_CLR_DIS;
+
+ /* Only update control register when needed! */
+ if (mcr_new != mcr_cur)
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+index 54a7cd93cc0fe..0ca3223ad5457 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h
+@@ -339,6 +339,7 @@
+ #define MAC_MCR_FORCE_MODE BIT(15)
+ #define MAC_MCR_TX_EN BIT(14)
+ #define MAC_MCR_RX_EN BIT(13)
++#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12)
+ #define MAC_MCR_BACKOFF_EN BIT(9)
+ #define MAC_MCR_BACKPR_EN BIT(8)
+ #define MAC_MCR_FORCE_RX_FC BIT(5)
+--
+2.39.2
+
--- /dev/null
+From 3ca18a6f22982efdd0b4321d6431dacd3483658b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Mar 2023 08:43:07 -0700
+Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific
+ registers from the MAC driver
+
+From: Yuiko Oshino <yuiko.oshino@microchip.com>
+
+[ Upstream commit e57cf3639c323eeed05d3725fd82f91b349adca8 ]
+
+Move the LAN7800 internal phy (phy ID 0x0007c132) specific register
+accesses to the phy driver (microchip.c).
+
+Fix the error reported by Enguerrand de Ribaucourt in December 2022,
+"Some operations during the cable switch workaround modify the register
+LAN88XX_INT_MASK of the PHY. However, this register is specific to the
+LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801,
+that register (0x19), corresponds to the LED and MAC address
+configuration, resulting in unapropriate behavior."
+
+I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800
+with the internal PHY.
+
+Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error")
+Signed-off-by: Yuiko Oshino <yuiko.oshino@microchip.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++
+ drivers/net/usb/lan78xx.c | 27 +--------------------------
+ 2 files changed, 33 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c
+index a644e8e5071c3..375bbd60b38af 100644
+--- a/drivers/net/phy/microchip.c
++++ b/drivers/net/phy/microchip.c
+@@ -326,6 +326,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev)
+ return genphy_config_aneg(phydev);
+ }
+
++static void lan88xx_link_change_notify(struct phy_device *phydev)
++{
++ int temp;
++
++ /* At forced 100 F/H mode, chip may fail to set mode correctly
++ * when cable is switched between long(~50+m) and short one.
++ * As workaround, set to 10 before setting to 100
++ * at forced 100 F/H mode.
++ */
++ if (!phydev->autoneg && phydev->speed == 100) {
++ /* disable phy interrupt */
++ temp = phy_read(phydev, LAN88XX_INT_MASK);
++ temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
++
++ temp = phy_read(phydev, MII_BMCR);
++ temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
++ phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
++ temp |= BMCR_SPEED100;
++ phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
++
++ /* clear pending interrupt generated while workaround */
++ temp = phy_read(phydev, LAN88XX_INT_STS);
++
++ /* enable phy interrupt back */
++ temp = phy_read(phydev, LAN88XX_INT_MASK);
++ temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
++ }
++}
++
+ static struct phy_driver microchip_phy_driver[] = {
+ {
+ .phy_id = 0x0007c130,
+@@ -339,6 +370,7 @@ static struct phy_driver microchip_phy_driver[] = {
+
+ .config_init = lan88xx_config_init,
+ .config_aneg = lan88xx_config_aneg,
++ .link_change_notify = lan88xx_link_change_notify,
+
+ .ack_interrupt = lan88xx_phy_ack_interrupt,
+ .config_intr = lan88xx_phy_config_intr,
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 0b5b4f9c7c5b9..667984efeb3be 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -1843,33 +1843,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
+ static void lan78xx_link_status_change(struct net_device *net)
+ {
+ struct phy_device *phydev = net->phydev;
+- int temp;
+-
+- /* At forced 100 F/H mode, chip may fail to set mode correctly
+- * when cable is switched between long(~50+m) and short one.
+- * As workaround, set to 10 before setting to 100
+- * at forced 100 F/H mode.
+- */
+- if (!phydev->autoneg && (phydev->speed == 100)) {
+- /* disable phy interrupt */
+- temp = phy_read(phydev, LAN88XX_INT_MASK);
+- temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+- phy_write(phydev, LAN88XX_INT_MASK, temp);
+
+- temp = phy_read(phydev, MII_BMCR);
+- temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+- phy_write(phydev, MII_BMCR, temp); /* set to 10 first */
+- temp |= BMCR_SPEED100;
+- phy_write(phydev, MII_BMCR, temp); /* set to 100 later */
+-
+- /* clear pending interrupt generated while workaround */
+- temp = phy_read(phydev, LAN88XX_INT_STS);
+-
+- /* enable phy interrupt back */
+- temp = phy_read(phydev, LAN88XX_INT_MASK);
+- temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+- phy_write(phydev, LAN88XX_INT_MASK, temp);
+- }
++ phy_print_status(phydev);
+ }
+
+ static int irq_map(struct irq_domain *d, unsigned int irq,
+--
+2.39.2
+
--- /dev/null
+From d6acee209596f0b12703c102f93bb74f3a892cdd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 16:37:54 +0000
+Subject: net: phylib: get rid of unnecessary locking
+
+From: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+
+[ Upstream commit f4b47a2e9463950df3e7c8b70e017877c1d4eb11 ]
+
+The locking in phy_probe() and phy_remove() does very little to prevent
+any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA
+warnings. Remove it.
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.2.0-dirty #1108 Tainted: G W E
+------------------------------------------------------
+ip/415 is trying to acquire lock:
+ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy]
+
+but task is already holding lock:
+ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560
+
+which lock already depends on the new lock.
+
+the existing dependency chain (in reverse order) is:
+
+-> #1 (rtnl_mutex){+.+.}-{3:3}:
+ __lock_acquire+0x35c/0x6c0
+ lock_acquire.part.0+0xcc/0x220
+ lock_acquire+0x68/0x84
+ __mutex_lock+0x8c/0x414
+ mutex_lock_nested+0x34/0x40
+ rtnl_lock+0x24/0x30
+ sfp_bus_add_upstream+0x34/0x150
+ phy_sfp_probe+0x4c/0x94 [libphy]
+ mv3310_probe+0x148/0x184 [marvell10g]
+ phy_probe+0x8c/0x200 [libphy]
+ call_driver_probe+0xbc/0x15c
+ really_probe+0xc0/0x320
+ __driver_probe_device+0x84/0x120
+ driver_probe_device+0x44/0x120
+ __device_attach_driver+0xc4/0x160
+ bus_for_each_drv+0x80/0xe0
+ __device_attach+0xb0/0x1f0
+ device_initial_probe+0x1c/0x2c
+ bus_probe_device+0xa4/0xb0
+ device_add+0x360/0x53c
+ phy_device_register+0x60/0xa4 [libphy]
+ fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio]
+ fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio]
+ of_mdiobus_register+0x140/0x340 [of_mdio]
+ orion_mdio_probe+0x298/0x3c0 [mvmdio]
+ platform_probe+0x70/0xe0
+ call_driver_probe+0x34/0x15c
+ really_probe+0xc0/0x320
+ __driver_probe_device+0x84/0x120
+ driver_probe_device+0x44/0x120
+ __driver_attach+0x104/0x210
+ bus_for_each_dev+0x78/0xdc
+ driver_attach+0x2c/0x3c
+ bus_add_driver+0x184/0x240
+ driver_register+0x80/0x13c
+ __platform_driver_register+0x30/0x3c
+ xt_compat_calc_jump+0x28/0xa4 [x_tables]
+ do_one_initcall+0x50/0x1b0
+ do_init_module+0x50/0x1fc
+ load_module+0x684/0x744
+ __do_sys_finit_module+0xc4/0x140
+ __arm64_sys_finit_module+0x28/0x34
+ invoke_syscall+0x50/0x120
+ el0_svc_common.constprop.0+0x6c/0x1b0
+ do_el0_svc+0x34/0x44
+ el0_svc+0x48/0xf0
+ el0t_64_sync_handler+0xb8/0xc0
+ el0t_64_sync+0x1a0/0x1a4
+
+-> #0 (&dev->lock){+.+.}-{3:3}:
+ check_prev_add+0xb4/0xc80
+ validate_chain+0x414/0x47c
+ __lock_acquire+0x35c/0x6c0
+ lock_acquire.part.0+0xcc/0x220
+ lock_acquire+0x68/0x84
+ __mutex_lock+0x8c/0x414
+ mutex_lock_nested+0x34/0x40
+ phy_attach_direct+0x17c/0x3a0 [libphy]
+ phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink]
+ phylink_fwnode_phy_connect+0x48/0x60 [phylink]
+ mvpp2_open+0xec/0x2e0 [mvpp2]
+ __dev_open+0x104/0x214
+ __dev_change_flags+0x1d4/0x254
+ dev_change_flags+0x2c/0x7c
+ do_setlink+0x254/0xa50
+ __rtnl_newlink+0x430/0x514
+ rtnl_newlink+0x58/0x8c
+ rtnetlink_rcv_msg+0x17c/0x560
+ netlink_rcv_skb+0x64/0x150
+ rtnetlink_rcv+0x20/0x30
+ netlink_unicast+0x1d4/0x2b4
+ netlink_sendmsg+0x1a4/0x400
+ ____sys_sendmsg+0x228/0x290
+ ___sys_sendmsg+0x88/0xec
+ __sys_sendmsg+0x70/0xd0
+ __arm64_sys_sendmsg+0x2c/0x40
+ invoke_syscall+0x50/0x120
+ el0_svc_common.constprop.0+0x6c/0x1b0
+ do_el0_svc+0x34/0x44
+ el0_svc+0x48/0xf0
+ el0t_64_sync_handler+0xb8/0xc0
+ el0t_64_sync+0x1a0/0x1a4
+
+other info that might help us debug this:
+
+ Possible unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(rtnl_mutex);
+ lock(&dev->lock);
+ lock(rtnl_mutex);
+ lock(&dev->lock);
+
+ *** DEADLOCK ***
+
+Fixes: 298e54fa810e ("net: phy: add core phylib sfp support")
+Reported-by: Marc Zyngier <maz@kernel.org>
+Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 3ef5aa6b72a7e..e771e0e8a9bc6 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -2833,8 +2833,6 @@ static int phy_probe(struct device *dev)
+ if (phydrv->flags & PHY_IS_INTERNAL)
+ phydev->is_internal = true;
+
+- mutex_lock(&phydev->lock);
+-
+ /* Deassert the reset signal */
+ phy_device_reset(phydev, 0);
+
+@@ -2903,12 +2901,10 @@ static int phy_probe(struct device *dev)
+ phydev->state = PHY_READY;
+
+ out:
+- /* Assert the reset signal */
++ /* Re-assert the reset signal on error */
+ if (err)
+ phy_device_reset(phydev, 1);
+
+- mutex_unlock(&phydev->lock);
+-
+ return err;
+ }
+
+@@ -2918,9 +2914,7 @@ static int phy_remove(struct device *dev)
+
+ cancel_delayed_work_sync(&phydev->state_queue);
+
+- mutex_lock(&phydev->lock);
+ phydev->state = PHY_DOWN;
+- mutex_unlock(&phydev->lock);
+
+ sfp_bus_del_upstream(phydev->sfp_bus);
+ phydev->sfp_bus = NULL;
+--
+2.39.2
+
--- /dev/null
+From 15737a362a18c35944df6ef005f6b797cc011146 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Mar 2023 11:23:46 +0800
+Subject: net/smc: fix fallback failed while sendmsg with fastopen
+
+From: D. Wythe <alibuda@linux.alibaba.com>
+
+[ Upstream commit ce7ca794712f186da99719e8b4e97bd5ddbb04c3 ]
+
+Before determining whether the msg has unsupported options, it has been
+prematurely terminated by the wrong status check.
+
+For the application, the general usages of MSG_FASTOPEN likes
+
+fd = socket(...)
+/* rather than connect */
+sendto(fd, data, len, MSG_FASTOPEN)
+
+Hence, We need to check the flag before state check, because the sock
+state here is always SMC_INIT when applications tries MSG_FASTOPEN.
+Once we found unsupported options, fallback it to TCP.
+
+Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback")
+Signed-off-by: D. Wythe <alibuda@linux.alibaba.com>
+Signed-off-by: Simon Horman <simon.horman@corigine.com>
+
+v2 -> v1: Optimize code style
+Reviewed-by: Tony Lu <tonylu@linux.alibaba.com>
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/smc/af_smc.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 41cbc7c89c9d2..8ab84926816f6 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -1988,16 +1988,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ {
+ struct sock *sk = sock->sk;
+ struct smc_sock *smc;
+- int rc = -EPIPE;
++ int rc;
+
+ smc = smc_sk(sk);
+ lock_sock(sk);
+- if ((sk->sk_state != SMC_ACTIVE) &&
+- (sk->sk_state != SMC_APPCLOSEWAIT1) &&
+- (sk->sk_state != SMC_INIT))
+- goto out;
+
++ /* SMC does not support connect with fastopen */
+ if (msg->msg_flags & MSG_FASTOPEN) {
++ /* not connected yet, fallback */
+ if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) {
+ smc_switch_to_fallback(smc);
+ smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP;
+@@ -2005,6 +2003,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
+ rc = -EINVAL;
+ goto out;
+ }
++ } else if ((sk->sk_state != SMC_ACTIVE) &&
++ (sk->sk_state != SMC_APPCLOSEWAIT1) &&
++ (sk->sk_state != SMC_INIT)) {
++ rc = -EPIPE;
++ goto out;
+ }
+
+ if (smc->use_fallback)
+--
+2.39.2
+
--- /dev/null
+From ff5576cb0372afdabad429f5c9fc87d6ffb29972 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 14:21:43 +0800
+Subject: net: stmmac: add to set device wake up flag when stmmac init phy
+
+From: Rongguang Wei <weirongguang@kylinos.cn>
+
+[ Upstream commit a9334b702a03b693f54ebd3b98f67bf722b74870 ]
+
+When MAC is not support PMT, driver will check PHY's WoL capability
+and set device wakeup capability in stmmac_init_phy(). We can enable
+the WoL through ethtool, the driver would enable the device wake up
+flag. Now the device_may_wakeup() return true.
+
+But if there is a way which enable the PHY's WoL capability derectly,
+like in BIOS. The driver would not know the enable thing and would not
+set the device wake up flag. The phy_suspend may failed like this:
+
+[ 32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16
+[ 32.409065] PM: Device stmmac-1:00 failed to suspend: error -16
+[ 32.409067] PM: Some devices failed to suspend, or early wake event detected
+
+Add to set the device wakeup enable flag according to the get_wol
+function result in PHY can fix the error in this scene.
+
+v2: add a Fixes tag.
+
+Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy")
+Signed-off-by: Rongguang Wei <weirongguang@kylinos.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+index 1ec000d4c7705..04c59102a2863 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+@@ -1145,6 +1145,7 @@ static int stmmac_init_phy(struct net_device *dev)
+
+ phylink_ethtool_get_wol(priv->phylink, &wol);
+ device_set_wakeup_capable(priv->device, !!wol.supported);
++ device_set_wakeup_enable(priv->device, !!wol.wolopts);
+ }
+
+ return ret;
+--
+2.39.2
+
--- /dev/null
+From 5bd38ea4f8a413bd78444f4001fd1ca2b70e9193 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Nov 2020 11:45:06 +0000
+Subject: net: usb: lan78xx: Remove lots of set but unused 'ret' variables
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Lee Jones <lee.jones@linaro.org>
+
+[ Upstream commit 06cd7c46b3ab3f2252c61bf85b191236cf0254e1 ]
+
+Fixes the following W=1 kernel build warning(s):
+
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_read_raw_otp’:
+ drivers/net/usb/lan78xx.c:825:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_write_raw_otp’:
+ drivers/net/usb/lan78xx.c:879:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_deferred_multicast_write’:
+ drivers/net/usb/lan78xx.c:1041:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_update_flowcontrol’:
+ drivers/net/usb/lan78xx.c:1127:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_init_mac_address’:
+ drivers/net/usb/lan78xx.c:1666:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_link_status_change’:
+ drivers/net/usb/lan78xx.c:1841:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_irq_bus_sync_unlock’:
+ drivers/net/usb/lan78xx.c:1920:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan8835_fixup’:
+ drivers/net/usb/lan78xx.c:1994:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_rx_max_frame_length’:
+ drivers/net/usb/lan78xx.c:2192:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_change_mtu’:
+ drivers/net/usb/lan78xx.c:2270:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_mac_addr’:
+ drivers/net/usb/lan78xx.c:2299:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_features’:
+ drivers/net/usb/lan78xx.c:2333:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+ drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_suspend’:
+ drivers/net/usb/lan78xx.c:3807:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable]
+
+Signed-off-by: Lee Jones <lee.jones@linaro.org>
+Link: https://lore.kernel.org/r/20201102114512.1062724-25-lee.jones@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: e57cf3639c32 ("net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/lan78xx.c | 168 ++++++++++++++++++--------------------
+ 1 file changed, 78 insertions(+), 90 deletions(-)
+
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index 6f7b70522d926..0b5b4f9c7c5b9 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -824,20 +824,19 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+ u32 length, u8 *data)
+ {
+ int i;
+- int ret;
+ u32 buf;
+ unsigned long timeout;
+
+- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+
+ if (buf & OTP_PWR_DN_PWRDN_N_) {
+ /* clear it and wait to be cleared */
+- ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
++ lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+
+ timeout = jiffies + HZ;
+ do {
+ usleep_range(1, 10);
+- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (time_after(jiffies, timeout)) {
+ netdev_warn(dev->net,
+ "timeout on OTP_PWR_DN");
+@@ -847,18 +846,18 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+ }
+
+ for (i = 0; i < length; i++) {
+- ret = lan78xx_write_reg(dev, OTP_ADDR1,
++ lan78xx_write_reg(dev, OTP_ADDR1,
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
+- ret = lan78xx_write_reg(dev, OTP_ADDR2,
++ lan78xx_write_reg(dev, OTP_ADDR2,
+ ((offset + i) & OTP_ADDR2_10_3));
+
+- ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
+- ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
++ lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_);
++ lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+
+ timeout = jiffies + HZ;
+ do {
+ udelay(1);
+- ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
++ lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ if (time_after(jiffies, timeout)) {
+ netdev_warn(dev->net,
+ "timeout on OTP_STATUS");
+@@ -866,7 +865,7 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset,
+ }
+ } while (buf & OTP_STATUS_BUSY_);
+
+- ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
++ lan78xx_read_reg(dev, OTP_RD_DATA, &buf);
+
+ data[i] = (u8)(buf & 0xFF);
+ }
+@@ -878,20 +877,19 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
+ u32 length, u8 *data)
+ {
+ int i;
+- int ret;
+ u32 buf;
+ unsigned long timeout;
+
+- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+
+ if (buf & OTP_PWR_DN_PWRDN_N_) {
+ /* clear it and wait to be cleared */
+- ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0);
++ lan78xx_write_reg(dev, OTP_PWR_DN, 0);
+
+ timeout = jiffies + HZ;
+ do {
+ udelay(1);
+- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf);
+ if (time_after(jiffies, timeout)) {
+ netdev_warn(dev->net,
+ "timeout on OTP_PWR_DN completion");
+@@ -901,21 +899,21 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset,
+ }
+
+ /* set to BYTE program mode */
+- ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
++ lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_);
+
+ for (i = 0; i < length; i++) {
+- ret = lan78xx_write_reg(dev, OTP_ADDR1,
++ lan78xx_write_reg(dev, OTP_ADDR1,
+ ((offset + i) >> 8) & OTP_ADDR1_15_11);
+- ret = lan78xx_write_reg(dev, OTP_ADDR2,
++ lan78xx_write_reg(dev, OTP_ADDR2,
+ ((offset + i) & OTP_ADDR2_10_3));
+- ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
+- ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
+- ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
++ lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]);
++ lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_);
++ lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_);
+
+ timeout = jiffies + HZ;
+ do {
+ udelay(1);
+- ret = lan78xx_read_reg(dev, OTP_STATUS, &buf);
++ lan78xx_read_reg(dev, OTP_STATUS, &buf);
+ if (time_after(jiffies, timeout)) {
+ netdev_warn(dev->net,
+ "Timeout on OTP_STATUS completion");
+@@ -1040,7 +1038,6 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
+ container_of(param, struct lan78xx_priv, set_multicast);
+ struct lan78xx_net *dev = pdata->dev;
+ int i;
+- int ret;
+
+ netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n",
+ pdata->rfe_ctl);
+@@ -1049,14 +1046,14 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param)
+ DP_SEL_VHF_HASH_LEN, pdata->mchash_table);
+
+ for (i = 1; i < NUM_OF_MAF; i++) {
+- ret = lan78xx_write_reg(dev, MAF_HI(i), 0);
+- ret = lan78xx_write_reg(dev, MAF_LO(i),
++ lan78xx_write_reg(dev, MAF_HI(i), 0);
++ lan78xx_write_reg(dev, MAF_LO(i),
+ pdata->pfilter_table[i][1]);
+- ret = lan78xx_write_reg(dev, MAF_HI(i),
++ lan78xx_write_reg(dev, MAF_HI(i),
+ pdata->pfilter_table[i][0]);
+ }
+
+- ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
++ lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+ }
+
+ static void lan78xx_set_multicast(struct net_device *netdev)
+@@ -1126,7 +1123,6 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
+ u16 lcladv, u16 rmtadv)
+ {
+ u32 flow = 0, fct_flow = 0;
+- int ret;
+ u8 cap;
+
+ if (dev->fc_autoneg)
+@@ -1149,10 +1145,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex,
+ (cap & FLOW_CTRL_RX ? "enabled" : "disabled"),
+ (cap & FLOW_CTRL_TX ? "enabled" : "disabled"));
+
+- ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow);
++ lan78xx_write_reg(dev, FCT_FLOW, fct_flow);
+
+ /* threshold value should be set before enabling flow */
+- ret = lan78xx_write_reg(dev, FLOW, flow);
++ lan78xx_write_reg(dev, FLOW, flow);
+
+ return 0;
+ }
+@@ -1673,11 +1669,10 @@ static const struct ethtool_ops lan78xx_ethtool_ops = {
+ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
+ {
+ u32 addr_lo, addr_hi;
+- int ret;
+ u8 addr[6];
+
+- ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
+- ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
++ lan78xx_read_reg(dev, RX_ADDRL, &addr_lo);
++ lan78xx_read_reg(dev, RX_ADDRH, &addr_hi);
+
+ addr[0] = addr_lo & 0xFF;
+ addr[1] = (addr_lo >> 8) & 0xFF;
+@@ -1710,12 +1705,12 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev)
+ (addr[2] << 16) | (addr[3] << 24);
+ addr_hi = addr[4] | (addr[5] << 8);
+
+- ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+- ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
++ lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
++ lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+ }
+
+- ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+- ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
++ lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
++ lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+
+ ether_addr_copy(dev->net->dev_addr, addr);
+ }
+@@ -1848,7 +1843,7 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev)
+ static void lan78xx_link_status_change(struct net_device *net)
+ {
+ struct phy_device *phydev = net->phydev;
+- int ret, temp;
++ int temp;
+
+ /* At forced 100 F/H mode, chip may fail to set mode correctly
+ * when cable is switched between long(~50+m) and short one.
+@@ -1859,7 +1854,7 @@ static void lan78xx_link_status_change(struct net_device *net)
+ /* disable phy interrupt */
+ temp = phy_read(phydev, LAN88XX_INT_MASK);
+ temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_;
+- ret = phy_write(phydev, LAN88XX_INT_MASK, temp);
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
+
+ temp = phy_read(phydev, MII_BMCR);
+ temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000);
+@@ -1873,7 +1868,7 @@ static void lan78xx_link_status_change(struct net_device *net)
+ /* enable phy interrupt back */
+ temp = phy_read(phydev, LAN88XX_INT_MASK);
+ temp |= LAN88XX_INT_MASK_MDINTPIN_EN_;
+- ret = phy_write(phydev, LAN88XX_INT_MASK, temp);
++ phy_write(phydev, LAN88XX_INT_MASK, temp);
+ }
+ }
+
+@@ -1927,14 +1922,13 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd)
+ struct lan78xx_net *dev =
+ container_of(data, struct lan78xx_net, domain_data);
+ u32 buf;
+- int ret;
+
+ /* call register access here because irq_bus_lock & irq_bus_sync_unlock
+ * are only two callbacks executed in non-atomic contex.
+ */
+- ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf);
++ lan78xx_read_reg(dev, INT_EP_CTL, &buf);
+ if (buf != data->irqenable)
+- ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
++ lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable);
+
+ mutex_unlock(&data->irq_lock);
+ }
+@@ -2001,7 +1995,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev)
+ static int lan8835_fixup(struct phy_device *phydev)
+ {
+ int buf;
+- int ret;
+ struct lan78xx_net *dev = netdev_priv(phydev->attached_dev);
+
+ /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */
+@@ -2011,11 +2004,11 @@ static int lan8835_fixup(struct phy_device *phydev)
+ phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf);
+
+ /* RGMII MAC TXC Delay Enable */
+- ret = lan78xx_write_reg(dev, MAC_RGMII_ID,
++ lan78xx_write_reg(dev, MAC_RGMII_ID,
+ MAC_RGMII_ID_TXC_DELAY_EN_);
+
+ /* RGMII TX DLL Tune Adjust */
+- ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
++ lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00);
+
+ dev->interface = PHY_INTERFACE_MODE_RGMII_TXID;
+
+@@ -2199,28 +2192,27 @@ static int lan78xx_phy_init(struct lan78xx_net *dev)
+
+ static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size)
+ {
+- int ret = 0;
+ u32 buf;
+ bool rxenabled;
+
+- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++ lan78xx_read_reg(dev, MAC_RX, &buf);
+
+ rxenabled = ((buf & MAC_RX_RXEN_) != 0);
+
+ if (rxenabled) {
+ buf &= ~MAC_RX_RXEN_;
+- ret = lan78xx_write_reg(dev, MAC_RX, buf);
++ lan78xx_write_reg(dev, MAC_RX, buf);
+ }
+
+ /* add 4 to size for FCS */
+ buf &= ~MAC_RX_MAX_SIZE_MASK_;
+ buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_);
+
+- ret = lan78xx_write_reg(dev, MAC_RX, buf);
++ lan78xx_write_reg(dev, MAC_RX, buf);
+
+ if (rxenabled) {
+ buf |= MAC_RX_RXEN_;
+- ret = lan78xx_write_reg(dev, MAC_RX, buf);
++ lan78xx_write_reg(dev, MAC_RX, buf);
+ }
+
+ return 0;
+@@ -2277,13 +2269,12 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu)
+ int ll_mtu = new_mtu + netdev->hard_header_len;
+ int old_hard_mtu = dev->hard_mtu;
+ int old_rx_urb_size = dev->rx_urb_size;
+- int ret;
+
+ /* no second zero-length packet read wanted after mtu-sized packets */
+ if ((ll_mtu % dev->maxpacket) == 0)
+ return -EDOM;
+
+- ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
++ lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN);
+
+ netdev->mtu = new_mtu;
+
+@@ -2306,7 +2297,6 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+ struct lan78xx_net *dev = netdev_priv(netdev);
+ struct sockaddr *addr = p;
+ u32 addr_lo, addr_hi;
+- int ret;
+
+ if (netif_running(netdev))
+ return -EBUSY;
+@@ -2323,12 +2313,12 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p)
+ addr_hi = netdev->dev_addr[4] |
+ netdev->dev_addr[5] << 8;
+
+- ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
+- ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
++ lan78xx_write_reg(dev, RX_ADDRL, addr_lo);
++ lan78xx_write_reg(dev, RX_ADDRH, addr_hi);
+
+ /* Added to support MAC address changes */
+- ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
+- ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
++ lan78xx_write_reg(dev, MAF_LO(0), addr_lo);
++ lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_);
+
+ return 0;
+ }
+@@ -2340,7 +2330,6 @@ static int lan78xx_set_features(struct net_device *netdev,
+ struct lan78xx_net *dev = netdev_priv(netdev);
+ struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]);
+ unsigned long flags;
+- int ret;
+
+ spin_lock_irqsave(&pdata->rfe_ctl_lock, flags);
+
+@@ -2364,7 +2353,7 @@ static int lan78xx_set_features(struct net_device *netdev,
+
+ spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags);
+
+- ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
++ lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl);
+
+ return 0;
+ }
+@@ -3820,7 +3809,6 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len)
+ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ {
+ u32 buf;
+- int ret;
+ int mask_index;
+ u16 crc;
+ u32 temp_wucsr;
+@@ -3829,26 +3817,26 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ const u8 ipv6_multicast[3] = { 0x33, 0x33 };
+ const u8 arp_type[2] = { 0x08, 0x06 };
+
+- ret = lan78xx_read_reg(dev, MAC_TX, &buf);
++ lan78xx_read_reg(dev, MAC_TX, &buf);
+ buf &= ~MAC_TX_TXEN_;
+- ret = lan78xx_write_reg(dev, MAC_TX, buf);
+- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++ lan78xx_write_reg(dev, MAC_TX, buf);
++ lan78xx_read_reg(dev, MAC_RX, &buf);
+ buf &= ~MAC_RX_RXEN_;
+- ret = lan78xx_write_reg(dev, MAC_RX, buf);
++ lan78xx_write_reg(dev, MAC_RX, buf);
+
+- ret = lan78xx_write_reg(dev, WUCSR, 0);
+- ret = lan78xx_write_reg(dev, WUCSR2, 0);
+- ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
++ lan78xx_write_reg(dev, WUCSR, 0);
++ lan78xx_write_reg(dev, WUCSR2, 0);
++ lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL);
+
+ temp_wucsr = 0;
+
+ temp_pmt_ctl = 0;
+- ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
++ lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl);
+ temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_;
+ temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_;
+
+ for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++)
+- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_CFG(mask_index), 0);
+
+ mask_index = 0;
+ if (wol & WAKE_PHY) {
+@@ -3877,30 +3865,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+
+ /* set WUF_CFG & WUF_MASK for IPv4 Multicast */
+ crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3);
+- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++ lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ WUF_CFGX_EN_ |
+ WUF_CFGX_TYPE_MCAST_ |
+ (0 << WUF_CFGX_OFFSET_SHIFT_) |
+ (crc & WUF_CFGX_CRC16_MASK_));
+
+- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
+- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7);
++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ mask_index++;
+
+ /* for IPv6 Multicast */
+ crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2);
+- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++ lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ WUF_CFGX_EN_ |
+ WUF_CFGX_TYPE_MCAST_ |
+ (0 << WUF_CFGX_OFFSET_SHIFT_) |
+ (crc & WUF_CFGX_CRC16_MASK_));
+
+- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
+- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3);
++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ mask_index++;
+
+ temp_pmt_ctl |= PMT_CTL_WOL_EN_;
+@@ -3921,16 +3909,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ * for packettype (offset 12,13) = ARP (0x0806)
+ */
+ crc = lan78xx_wakeframe_crc16(arp_type, 2);
+- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index),
++ lan78xx_write_reg(dev, WUF_CFG(mask_index),
+ WUF_CFGX_EN_ |
+ WUF_CFGX_TYPE_ALL_ |
+ (0 << WUF_CFGX_OFFSET_SHIFT_) |
+ (crc & WUF_CFGX_CRC16_MASK_));
+
+- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
+- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
+- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000);
++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0);
++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0);
+ mask_index++;
+
+ temp_pmt_ctl |= PMT_CTL_WOL_EN_;
+@@ -3938,7 +3926,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
+ }
+
+- ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr);
++ lan78xx_write_reg(dev, WUCSR, temp_wucsr);
+
+ /* when multiple WOL bits are set */
+ if (hweight_long((unsigned long)wol) > 1) {
+@@ -3946,16 +3934,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol)
+ temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_;
+ temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_;
+ }
+- ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
++ lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl);
+
+ /* clear WUPS */
+- ret = lan78xx_read_reg(dev, PMT_CTL, &buf);
++ lan78xx_read_reg(dev, PMT_CTL, &buf);
+ buf |= PMT_CTL_WUPS_MASK_;
+- ret = lan78xx_write_reg(dev, PMT_CTL, buf);
++ lan78xx_write_reg(dev, PMT_CTL, buf);
+
+- ret = lan78xx_read_reg(dev, MAC_RX, &buf);
++ lan78xx_read_reg(dev, MAC_RX, &buf);
+ buf |= MAC_RX_RXEN_;
+- ret = lan78xx_write_reg(dev, MAC_RX, buf);
++ lan78xx_write_reg(dev, MAC_RX, buf);
+
+ return 0;
+ }
+--
+2.39.2
+
--- /dev/null
+From 6cb1137d72bb5310fa2ef6f663fc7b7adf26ee20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 17:48:31 -0800
+Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type
+
+From: Ivan Delalande <colona@arista.com>
+
+[ Upstream commit 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e ]
+
+It seems that change was unintentional, we have userspace code that
+needs the mark while listening for events like REPLY, DESTROY, etc.
+Also include 0-marks in requested dumps, as they were before that fix.
+
+Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark")
+Signed-off-by: Ivan Delalande <colona@arista.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_conntrack_netlink.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
+index f8ba3bc25cf34..c9ca857f1068d 100644
+--- a/net/netfilter/nf_conntrack_netlink.c
++++ b/net/netfilter/nf_conntrack_netlink.c
+@@ -317,11 +317,12 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
++static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct,
++ bool dump)
+ {
+ u32 mark = READ_ONCE(ct->mark);
+
+- if (!mark)
++ if (!mark && !dump)
+ return 0;
+
+ if (nla_put_be32(skb, CTA_MARK, htonl(mark)))
+@@ -332,7 +333,7 @@ static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+ return -1;
+ }
+ #else
+-#define ctnetlink_dump_mark(a, b) (0)
++#define ctnetlink_dump_mark(a, b, c) (0)
+ #endif
+
+ #ifdef CONFIG_NF_CONNTRACK_SECMARK
+@@ -537,7 +538,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb,
+ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
+ {
+ if (ctnetlink_dump_status(skb, ct) < 0 ||
+- ctnetlink_dump_mark(skb, ct) < 0 ||
++ ctnetlink_dump_mark(skb, ct, true) < 0 ||
+ ctnetlink_dump_secctx(skb, ct) < 0 ||
+ ctnetlink_dump_id(skb, ct) < 0 ||
+ ctnetlink_dump_use(skb, ct) < 0 ||
+@@ -816,8 +817,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
+ }
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- if (events & (1 << IPCT_MARK) &&
+- ctnetlink_dump_mark(skb, ct) < 0)
++ if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK)))
+ goto nla_put_failure;
+ #endif
+ nlmsg_end(skb, nlh);
+@@ -2734,7 +2734,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct)
+ goto nla_put_failure;
+
+ #ifdef CONFIG_NF_CONNTRACK_MARK
+- if (ctnetlink_dump_mark(skb, ct) < 0)
++ if (ctnetlink_dump_mark(skb, ct, true) < 0)
+ goto nla_put_failure;
+ #endif
+ if (ctnetlink_dump_labels(skb, ct) < 0)
+--
+2.39.2
+
--- /dev/null
+From 6381d75ba1ecd932217d7f7d64942ff0f5b7445f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 10:58:56 +0100
+Subject: netfilter: tproxy: fix deadlock due to missing BH disable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 4a02426787bf024dafdb79b362285ee325de3f5e ]
+
+The xtables packet traverser performs an unconditional local_bh_disable(),
+but the nf_tables evaluation loop does not.
+
+Functions that are called from either xtables or nftables must assume
+that they can be called in process context.
+
+inet_twsk_deschedule_put() assumes that no softirq interrupt can occur.
+If tproxy is used from nf_tables its possible that we'll deadlock
+trying to aquire a lock already held in process context.
+
+Add a small helper that takes care of this and use it.
+
+Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/
+Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support")
+Reported-and-tested-by: Major Dávid <major.david@balasys.hu>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/netfilter/nf_tproxy.h | 7 +++++++
+ net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +-
+ net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +-
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h
+index 82d0e41b76f22..faa108b1ba675 100644
+--- a/include/net/netfilter/nf_tproxy.h
++++ b/include/net/netfilter/nf_tproxy.h
+@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk)
+ return false;
+ }
+
++static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw)
++{
++ local_bh_disable();
++ inet_twsk_deschedule_put(tw);
++ local_bh_enable();
++}
++
+ /* assign a socket to the skb -- consumes sk */
+ static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk)
+ {
+diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+index b2bae0b0e42a1..61cb2341f50fe 100644
+--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c
++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c
+@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb,
+ hp->source, lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+- inet_twsk_deschedule_put(inet_twsk(sk));
++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+index 6bac68fb27a39..3fe4f15e01dc8 100644
+--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c
++++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c
+@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
+ lport ? lport : hp->dest,
+ skb->dev, NF_TPROXY_LOOKUP_LISTENER);
+ if (sk2) {
+- inet_twsk_deschedule_put(inet_twsk(sk));
++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk));
+ sk = sk2;
+ }
+ }
+--
+2.39.2
+
--- /dev/null
+From ad95ef4d6e4c52faf8606c661e9cd450d5edca1d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 17:30:37 +0800
+Subject: nfc: fdp: add null check of devm_kmalloc_array in
+ fdp_nci_i2c_read_device_properties
+
+From: Kang Chen <void0red@gmail.com>
+
+[ Upstream commit 11f180a5d62a51b484e9648f9b310e1bd50b1a57 ]
+
+devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause
+out-of-bounds write in device_property_read_u8_array later.
+
+Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver")
+Signed-off-by: Kang Chen <void0red@gmail.com>
+Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nfc/fdp/i2c.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c
+index 5e300788be525..808d73050afd0 100644
+--- a/drivers/nfc/fdp/i2c.c
++++ b/drivers/nfc/fdp/i2c.c
+@@ -249,6 +249,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
+ len, sizeof(**fw_vsc_cfg),
+ GFP_KERNEL);
+
++ if (!*fw_vsc_cfg)
++ goto alloc_err;
++
+ r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME,
+ *fw_vsc_cfg, len);
+
+@@ -262,6 +265,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev,
+ *fw_vsc_cfg = NULL;
+ }
+
++alloc_err:
+ dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s",
+ *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no");
+ }
+--
+2.39.2
+
--- /dev/null
+From 695d62e869960ab3f07236095ed9f3f2bb757344 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 25 Feb 2023 21:39:51 -0800
+Subject: platform: x86: MLX_PLATFORM: select REGMAP instead of depending on it
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 7e7e1541c91615e9950d0b96bcd1806d297e970e ]
+
+REGMAP is a hidden (not user visible) symbol. Users cannot set it
+directly thru "make *config", so drivers should select it instead of
+depending on it if they need it.
+
+Consistently using "select" or "depends on" can also help reduce
+Kconfig circular dependency issues.
+
+Therefore, change the use of "depends on REGMAP" to "select REGMAP".
+
+Fixes: ef0f62264b2a ("platform/x86: mlx-platform: Add physical bus number auto detection")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Cc: Vadim Pasternak <vadimp@mellanox.com>
+Cc: Darren Hart <dvhart@infradead.org>
+Cc: Hans de Goede <hdegoede@redhat.com>
+Cc: Mark Gross <markgross@kernel.org>
+Cc: platform-driver-x86@vger.kernel.org
+Link: https://lore.kernel.org/r/20230226053953.4681-7-rdunlap@infradead.org
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/Kconfig | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
+index a1858689d6e10..84c5b922f245e 100644
+--- a/drivers/platform/x86/Kconfig
++++ b/drivers/platform/x86/Kconfig
+@@ -1195,7 +1195,8 @@ config I2C_MULTI_INSTANTIATE
+
+ config MLX_PLATFORM
+ tristate "Mellanox Technologies platform support"
+- depends on I2C && REGMAP
++ depends on I2C
++ select REGMAP
+ help
+ This option enables system support for the Mellanox Technologies
+ platform. The Mellanox systems provide data center networking
+--
+2.39.2
+
--- /dev/null
+From 66894d71b0beadb5b792a7b62763eab3a7798844 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Feb 2023 17:59:39 +0200
+Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 ]
+
+It looks like U-Boot fails to start the kernel properly when the
+compatible string of the board isn't fsl,T1040RDB, so stop overriding it
+from the rev-a.dts.
+
+Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch")
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+index 73f8c998c64df..d4f5f159d6f23 100644
+--- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
++++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts
+@@ -10,7 +10,6 @@
+
+ / {
+ model = "fsl,T1040RDB-REV-A";
+- compatible = "fsl,T1040RDB-REV-A";
+ };
+
+ &seville_port0 {
+--
+2.39.2
+
--- /dev/null
+From f0d5977c98155a0bed9f1a9093ca9a7e9f2d0f0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Sep 2022 13:00:37 -0700
+Subject: RISC-V: Avoid dereferening NULL regs in die()
+
+From: Palmer Dabbelt <palmer@rivosinc.com>
+
+[ Upstream commit f2913d006fcdb61719635e093d1b5dd0dafecac7 ]
+
+I don't think we can actually die() without a regs pointer, but the
+compiler was warning about a NULL check after a dereference. It seems
+prudent to just avoid the possibly-NULL dereference, given that when
+die()ing the system is already toast so who knows how we got there.
+
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20220920200037.6727-1-palmer@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Stable-dep-of: 130aee3fd998 ("riscv: Avoid enabling interrupts in die()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index 23fe03ca7ec7b..bc6b30f3add83 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -31,6 +31,7 @@ void die(struct pt_regs *regs, const char *str)
+ {
+ static int die_counter;
+ int ret;
++ long cause;
+
+ oops_enter();
+
+@@ -40,11 +41,13 @@ void die(struct pt_regs *regs, const char *str)
+
+ pr_emerg("%s [#%d]\n", str, ++die_counter);
+ print_modules();
+- show_regs(regs);
++ if (regs)
++ show_regs(regs);
+
+- ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV);
++ cause = regs ? regs->cause : -1;
++ ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV);
+
+- if (regs && kexec_should_crash(current))
++ if (kexec_should_crash(current))
+ crash_kexec(regs);
+
+ bust_spinlocks(0);
+--
+2.39.2
+
--- /dev/null
+From 08ee349fb46fb31600fbf34244e65b4378ea2d65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 14:37:55 +0000
+Subject: RISC-V: Don't check text_mutex during stop_machine
+
+From: Conor Dooley <conor.dooley@microchip.com>
+
+[ Upstream commit 2a8db5ec4a28a0fce822d10224db9471a44b6925 ]
+
+We're currently using stop_machine() to update ftrace & kprobes, which
+means that the thread that takes text_mutex during may not be the same
+as the thread that eventually patches the code. This isn't actually a
+race because the lock is still held (preventing any other concurrent
+accesses) and there is only one thread running during stop_machine(),
+but it does trigger a lockdep failure.
+
+This patch just elides the lockdep check during stop_machine.
+
+Fixes: c15ac4fd60d5 ("riscv/ftrace: Add dynamic function tracer support")
+Suggested-by: Steven Rostedt <rostedt@goodmis.org>
+Reported-by: Changbin Du <changbin.du@gmail.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230303143754.4005217-1-conor.dooley@microchip.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/ftrace.h | 2 +-
+ arch/riscv/include/asm/patch.h | 2 ++
+ arch/riscv/kernel/ftrace.c | 14 ++++++++++++--
+ arch/riscv/kernel/patch.c | 28 +++++++++++++++++++++++++---
+ 4 files changed, 40 insertions(+), 6 deletions(-)
+
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index 9e73922e1e2e5..d47d87c2d7e3d 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+ #define ftrace_init_nop ftrace_init_nop
+ #endif
+
+-#endif
++#endif /* CONFIG_DYNAMIC_FTRACE */
+
+ #endif /* _ASM_RISCV_FTRACE_H */
+diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h
+index 9a7d7346001ee..98d9de07cba17 100644
+--- a/arch/riscv/include/asm/patch.h
++++ b/arch/riscv/include/asm/patch.h
+@@ -9,4 +9,6 @@
+ int patch_text_nosync(void *addr, const void *insns, size_t len);
+ int patch_text(void *addr, u32 insn);
+
++extern int riscv_patch_in_stop_machine;
++
+ #endif /* _ASM_RISCV_PATCH_H */
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 47b43d8ee9a6c..1bf92cfa6764e 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -15,11 +15,21 @@
+ int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex)
+ {
+ mutex_lock(&text_mutex);
++
++ /*
++ * The code sequences we use for ftrace can't be patched while the
++ * kernel is running, so we need to use stop_machine() to modify them
++ * for now. This doesn't play nice with text_mutex, we use this flag
++ * to elide the check.
++ */
++ riscv_patch_in_stop_machine = true;
++
+ return 0;
+ }
+
+ int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex)
+ {
++ riscv_patch_in_stop_machine = false;
+ mutex_unlock(&text_mutex);
+ return 0;
+ }
+@@ -109,9 +119,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ {
+ int out;
+
+- ftrace_arch_code_modify_prepare();
++ mutex_lock(&text_mutex);
+ out = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
+- ftrace_arch_code_modify_post_process();
++ mutex_unlock(&text_mutex);
+
+ return out;
+ }
+diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c
+index 1612e11f7bf6d..c3fced410e742 100644
+--- a/arch/riscv/kernel/patch.c
++++ b/arch/riscv/kernel/patch.c
+@@ -11,6 +11,7 @@
+ #include <asm/kprobes.h>
+ #include <asm/cacheflush.h>
+ #include <asm/fixmap.h>
++#include <asm/ftrace.h>
+ #include <asm/patch.h>
+
+ struct patch_insn {
+@@ -19,6 +20,8 @@ struct patch_insn {
+ atomic_t cpu_count;
+ };
+
++int riscv_patch_in_stop_machine = false;
++
+ #ifdef CONFIG_MMU
+ static void *patch_map(void *addr, int fixmap)
+ {
+@@ -55,8 +58,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len)
+ * Before reaching here, it was expected to lock the text_mutex
+ * already, so we don't need to give another lock here and could
+ * ensure that it was safe between each cores.
++ *
++ * We're currently using stop_machine() for ftrace & kprobes, and while
++ * that ensures text_mutex is held before installing the mappings it
++ * does not ensure text_mutex is held by the calling thread. That's
++ * safe but triggers a lockdep failure, so just elide it for that
++ * specific case.
+ */
+- lockdep_assert_held(&text_mutex);
++ if (!riscv_patch_in_stop_machine)
++ lockdep_assert_held(&text_mutex);
+
+ if (across_pages)
+ patch_map(addr + len, FIX_TEXT_POKE1);
+@@ -117,13 +127,25 @@ NOKPROBE_SYMBOL(patch_text_cb);
+
+ int patch_text(void *addr, u32 insn)
+ {
++ int ret;
+ struct patch_insn patch = {
+ .addr = addr,
+ .insn = insn,
+ .cpu_count = ATOMIC_INIT(0),
+ };
+
+- return stop_machine_cpuslocked(patch_text_cb,
+- &patch, cpu_online_mask);
++ /*
++ * kprobes takes text_mutex, before calling patch_text(), but as we call
++ * calls stop_machine(), the lockdep assertion in patch_insn_write()
++ * gets confused by the context in which the lock is taken.
++ * Instead, ensure the lock is held before calling stop_machine(), and
++ * set riscv_patch_in_stop_machine to skip the check in
++ * patch_insn_write().
++ */
++ lockdep_assert_held(&text_mutex);
++ riscv_patch_in_stop_machine = true;
++ ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask);
++ riscv_patch_in_stop_machine = false;
++ return ret;
+ }
+ NOKPROBE_SYMBOL(patch_text);
+--
+2.39.2
+
--- /dev/null
+From 02ddbaf7ea1c9a770160c69a31629798b39c46c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Jan 2023 17:42:42 +0800
+Subject: riscv: Add header include guards to insn.h
+
+From: Liao Chang <liaochang1@huawei.com>
+
+[ Upstream commit 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 ]
+
+Add header include guards to insn.h to prevent repeating declaration of
+any identifiers in insn.h.
+
+Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB")
+Signed-off-by: Liao Chang <liaochang1@huawei.com>
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h")
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/parse_asm.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/arch/riscv/include/asm/parse_asm.h b/arch/riscv/include/asm/parse_asm.h
+index 7fee806805c1b..ad254da85e615 100644
+--- a/arch/riscv/include/asm/parse_asm.h
++++ b/arch/riscv/include/asm/parse_asm.h
+@@ -3,6 +3,9 @@
+ * Copyright (C) 2020 SiFive
+ */
+
++#ifndef _ASM_RISCV_INSN_H
++#define _ASM_RISCV_INSN_H
++
+ #include <linux/bits.h>
+
+ /* The bit field of immediate value in I-type instruction */
+@@ -217,3 +220,5 @@ static inline bool is_ ## INSN_NAME ## _insn(long insn) \
+ (RVC_X(x_, RVC_B_IMM_5_OPOFF, RVC_B_IMM_5_MASK) << RVC_B_IMM_5_OFF) | \
+ (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \
+ (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); })
++
++#endif /* _ASM_RISCV_INSN_H */
+--
+2.39.2
+
--- /dev/null
+From a3ec8c84210a077e35f797833016d5e0e8b27e62 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 14:48:28 +0000
+Subject: riscv: Avoid enabling interrupts in die()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mattias Nissler <mnissler@rivosinc.com>
+
+[ Upstream commit 130aee3fd9981297ff9354e5d5609cd59aafbbea ]
+
+While working on something else, I noticed that the kernel would start
+accepting interrupts again after crashing in an interrupt handler. Since
+the kernel is already in inconsistent state, enabling interrupts is
+dangerous and opens up risk of kernel state deteriorating further.
+Interrupts do get enabled via what looks like an unintended side effect of
+spin_unlock_irq, so switch to the more cautious
+spin_lock_irqsave/spin_unlock_irqrestore instead.
+
+Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code")
+Signed-off-by: Mattias Nissler <mnissler@rivosinc.com>
+Reviewed-by: Björn Töpel <bjorn@kernel.org>
+Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
+index bc6b30f3add83..227253fde33c4 100644
+--- a/arch/riscv/kernel/traps.c
++++ b/arch/riscv/kernel/traps.c
+@@ -32,10 +32,11 @@ void die(struct pt_regs *regs, const char *str)
+ static int die_counter;
+ int ret;
+ long cause;
++ unsigned long flags;
+
+ oops_enter();
+
+- spin_lock_irq(&die_lock);
++ spin_lock_irqsave(&die_lock, flags);
+ console_verbose();
+ bust_spinlocks(1);
+
+@@ -52,7 +53,7 @@ void die(struct pt_regs *regs, const char *str)
+
+ bust_spinlocks(0);
+ add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
+- spin_unlock_irq(&die_lock);
++ spin_unlock_irqrestore(&die_lock, flags);
+ oops_exit();
+
+ if (in_interrupt())
+--
+2.39.2
+
--- /dev/null
+From 584f80a2b4caa34c1809e9356be1b8300aa2923e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 04:05:59 -0500
+Subject: riscv: ftrace: Reduce the detour code size to half
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 ]
+
+Use a temporary register to reduce the size of detour code from 16 bytes to
+8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv:
+Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'.
+
+Before the patch:
+<func_prolog>:
+ 0: REG_S ra, -SZREG(sp)
+ 4: auipc ra, ?
+ 8: jalr ?(ra)
+12: REG_L ra, -SZREG(sp)
+ (func_boddy)
+
+After the patch:
+<func_prolog>:
+ 0: auipc t0, ?
+ 4: jalr t0, ?(t0)
+ (func_boddy)
+
+This patch not just reduces the size of detour code, but also fixes an
+important issue:
+
+An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can
+actually change the instruction pointer, e.g. to "replace" the given
+kernel function with a new one, which is needed for livepatching, etc.
+
+In this case, the trampoline (ftrace_regs_caller) would not return to
+<func_prolog+12> but would rather jump to the new function. So, "REG_L
+ra, -SZREG(sp)" would not run and the original return address would not
+be restored. The kernel is likely to hang or crash as a result.
+
+This can be easily demonstrated if one tries to "replace", say,
+cmdline_proc_show() with a new function with the same signature using
+instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace
+callback.
+
+Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/
+Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/
+Co-developed-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Song Shuai <suagrfillet@gmail.com>
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Reviewed-by: Evgenii Shatokhin <e.shatokhin@yadro.com>
+Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 10626c32e382 ("riscv/ftrace: Add basic support")
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile | 4 +-
+ arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++------
+ arch/riscv/kernel/ftrace.c | 65 ++++++++++-----------------------
+ arch/riscv/kernel/mcount-dyn.S | 42 ++++++++-------------
+ 4 files changed, 75 insertions(+), 86 deletions(-)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 3e3467dbbf73f..8ceb667e4f38c 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -14,9 +14,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ ifeq ($(CONFIG_RISCV_ISA_C),y)
+- CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+-else
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+ endif
+ endif
+
+diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
+index 04dad33800418..9e73922e1e2e5 100644
+--- a/arch/riscv/include/asm/ftrace.h
++++ b/arch/riscv/include/asm/ftrace.h
+@@ -42,6 +42,14 @@ struct dyn_arch_ftrace {
+ * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to
+ * return address (original pc + 4)
+ *
++ *<ftrace enable>:
++ * 0: auipc t0/ra, 0x?
++ * 4: jalr t0/ra, ?(t0/ra)
++ *
++ *<ftrace disable>:
++ * 0: nop
++ * 4: nop
++ *
+ * Dynamic ftrace generates probes to call sites, so we must deal with
+ * both auipc and jalr at the same time.
+ */
+@@ -52,25 +60,43 @@ struct dyn_arch_ftrace {
+ #define AUIPC_OFFSET_MASK (0xfffff000)
+ #define AUIPC_PAD (0x00001000)
+ #define JALR_SHIFT 20
+-#define JALR_BASIC (0x000080e7)
+-#define AUIPC_BASIC (0x00000097)
++#define JALR_RA (0x000080e7)
++#define AUIPC_RA (0x00000097)
++#define JALR_T0 (0x000282e7)
++#define AUIPC_T0 (0x00000297)
+ #define NOP4 (0x00000013)
+
+-#define make_call(caller, callee, call) \
++#define to_jalr_t0(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0)
++
++#define to_auipc_t0(offset) \
++ ((offset & JALR_SIGN_MASK) ? \
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0))
++
++#define make_call_t0(caller, callee, call) \
+ do { \
+- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
+- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \
+- (unsigned long)caller)); \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_t0(offset); \
++ call[1] = to_jalr_t0(offset); \
+ } while (0)
+
+-#define to_jalr_insn(offset) \
+- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC)
++#define to_jalr_ra(offset) \
++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA)
+
+-#define to_auipc_insn(offset) \
++#define to_auipc_ra(offset) \
+ ((offset & JALR_SIGN_MASK) ? \
+- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \
+- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC))
++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \
++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA))
++
++#define make_call_ra(caller, callee, call) \
++do { \
++ unsigned int offset = \
++ (unsigned long) callee - (unsigned long) caller; \
++ call[0] = to_auipc_ra(offset); \
++ call[1] = to_jalr_ra(offset); \
++} while (0)
+
+ /*
+ * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here.
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 7f1e5203de886..47b43d8ee9a6c 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -57,12 +57,15 @@ static int ftrace_check_current_call(unsigned long hook_pos,
+ }
+
+ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+- bool enable)
++ bool enable, bool ra)
+ {
+ unsigned int call[2];
+ unsigned int nops[2] = {NOP4, NOP4};
+
+- make_call(hook_pos, target, call);
++ if (ra)
++ make_call_ra(hook_pos, target, call);
++ else
++ make_call_t0(hook_pos, target, call);
+
+ /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */
+ if (patch_text_nosync
+@@ -72,42 +75,13 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+ return 0;
+ }
+
+-/*
+- * Put 5 instructions with 16 bytes at the front of function within
+- * patchable function entry nops' area.
+- *
+- * 0: REG_S ra, -SZREG(sp)
+- * 1: auipc ra, 0x?
+- * 2: jalr -?(ra)
+- * 3: REG_L ra, -SZREG(sp)
+- *
+- * So the opcodes is:
+- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+- * 1: 0x???????? -> auipc
+- * 2: 0x???????? -> jalr
+- * 3: 0xff813083 (ld)/0xffc12083 (lw)
+- */
+-#if __riscv_xlen == 64
+-#define INSN0 0xfe113c23
+-#define INSN3 0xff813083
+-#elif __riscv_xlen == 32
+-#define INSN0 0xfe112e23
+-#define INSN3 0xffc12083
+-#endif
+-
+-#define FUNC_ENTRY_SIZE 16
+-#define FUNC_ENTRY_JMP 4
+-
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- unsigned int call[4] = {INSN0, 0, 0, INSN3};
+- unsigned long target = addr;
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned int call[2];
+
+- call[1] = to_auipc_insn((unsigned int)(target - caller));
+- call[2] = to_jalr_insn((unsigned int)(target - caller));
++ make_call_t0(rec->ip, addr, call);
+
+- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+@@ -116,15 +90,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
++ unsigned int nops[2] = {NOP4, NOP4};
+
+- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE))
+ return -EPERM;
+
+ return 0;
+ }
+
+-
+ /*
+ * This is called early on, and isn't wrapped by
+ * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold
+@@ -146,10 +119,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+ int ftrace_update_ftrace_func(ftrace_func_t func)
+ {
+ int ret = __ftrace_modify_call((unsigned long)&ftrace_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ if (!ret) {
+ ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call,
+- (unsigned long)func, true);
++ (unsigned long)func, true, true);
+ }
+
+ return ret;
+@@ -166,16 +139,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+ {
+ unsigned int call[2];
+- unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
++ unsigned long caller = rec->ip;
+ int ret;
+
+- make_call(caller, old_addr, call);
++ make_call_t0(caller, old_addr, call);
+ ret = ftrace_check_current_call(caller, call);
+
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call(caller, addr, true);
++ return __ftrace_modify_call(caller, addr, true, false);
+ }
+ #endif
+
+@@ -210,12 +183,12 @@ int ftrace_enable_ftrace_graph_caller(void)
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, true);
++ (unsigned long)&prepare_ftrace_return, true, true);
+ }
+
+ int ftrace_disable_ftrace_graph_caller(void)
+@@ -223,12 +196,12 @@ int ftrace_disable_ftrace_graph_caller(void)
+ int ret;
+
+ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ if (ret)
+ return ret;
+
+ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+- (unsigned long)&prepare_ftrace_return, false);
++ (unsigned long)&prepare_ftrace_return, false, true);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
+index d171eca623b6f..125de818d1bab 100644
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,8 +13,8 @@
+
+ .text
+
+-#define FENTRY_RA_OFFSET 12
+-#define ABI_SIZE_ON_STACK 72
++#define FENTRY_RA_OFFSET 8
++#define ABI_SIZE_ON_STACK 80
+ #define ABI_A0 0
+ #define ABI_A1 8
+ #define ABI_A2 16
+@@ -23,10 +23,10 @@
+ #define ABI_A5 40
+ #define ABI_A6 48
+ #define ABI_A7 56
+-#define ABI_RA 64
++#define ABI_T0 64
++#define ABI_RA 72
+
+ .macro SAVE_ABI
+- addi sp, sp, -SZREG
+ addi sp, sp, -ABI_SIZE_ON_STACK
+
+ REG_S a0, ABI_A0(sp)
+@@ -37,6 +37,7 @@
+ REG_S a5, ABI_A5(sp)
+ REG_S a6, ABI_A6(sp)
+ REG_S a7, ABI_A7(sp)
++ REG_S t0, ABI_T0(sp)
+ REG_S ra, ABI_RA(sp)
+ .endm
+
+@@ -49,24 +50,18 @@
+ REG_L a5, ABI_A5(sp)
+ REG_L a6, ABI_A6(sp)
+ REG_L a7, ABI_A7(sp)
++ REG_L t0, ABI_T0(sp)
+ REG_L ra, ABI_RA(sp)
+
+ addi sp, sp, ABI_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ .macro SAVE_ALL
+- addi sp, sp, -SZREG
+ addi sp, sp, -PT_SIZE_ON_STACK
+
+- REG_S x1, PT_EPC(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_L x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
++ REG_S t0, PT_EPC(sp)
+ REG_S x1, PT_RA(sp)
+- REG_L x1, PT_EPC(sp)
+-
+ REG_S x2, PT_SP(sp)
+ REG_S x3, PT_GP(sp)
+ REG_S x4, PT_TP(sp)
+@@ -100,15 +95,11 @@
+ .endm
+
+ .macro RESTORE_ALL
++ REG_L t0, PT_EPC(sp)
+ REG_L x1, PT_RA(sp)
+- addi sp, sp, PT_SIZE_ON_STACK
+- REG_S x1, (sp)
+- addi sp, sp, -PT_SIZE_ON_STACK
+- REG_L x1, PT_EPC(sp)
+ REG_L x2, PT_SP(sp)
+ REG_L x3, PT_GP(sp)
+ REG_L x4, PT_TP(sp)
+- REG_L x5, PT_T0(sp)
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+@@ -137,17 +128,16 @@
+ REG_L x31, PT_T6(sp)
+
+ addi sp, sp, PT_SIZE_ON_STACK
+- addi sp, sp, SZREG
+ .endm
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ ENTRY(ftrace_caller)
+ SAVE_ABI
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, ABI_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_call:
+@@ -155,8 +145,8 @@ ftrace_call:
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi a0, sp, ABI_SIZE_ON_STACK
+- REG_L a1, ABI_RA(sp)
++ addi a0, sp, ABI_RA
++ REG_L a1, ABI_T0(sp)
+ addi a1, a1, -FENTRY_RA_OFFSET
+ #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+ mv a2, s0
+@@ -166,17 +156,17 @@ ftrace_graph_call:
+ call ftrace_stub
+ #endif
+ RESTORE_ABI
+- ret
++ jr t0
+ ENDPROC(ftrace_caller)
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ENTRY(ftrace_regs_caller)
+ SAVE_ALL
+
+- addi a0, ra, -FENTRY_RA_OFFSET
++ addi a0, t0, -FENTRY_RA_OFFSET
+ la a1, function_trace_op
+ REG_L a2, 0(a1)
+- REG_L a1, PT_SIZE_ON_STACK(sp)
++ mv a1, ra
+ mv a3, sp
+
+ ftrace_regs_call:
+@@ -196,6 +186,6 @@ ftrace_graph_regs_call:
+ #endif
+
+ RESTORE_ALL
+- ret
++ jr t0
+ ENDPROC(ftrace_regs_caller)
+ #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+--
+2.39.2
+
--- /dev/null
+From 008af635bed0d4f0d83564b2b3986ed14472119b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Jan 2023 04:05:58 -0500
+Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 409c8fb20c66df7150e592747412438c04aeb11f ]
+
+When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate
+more nops than we expect. Because it treat nop opcode as 0x00000013
+instead of 0x0001.
+
+Dump of assembler code for function dw_pcie_free_msi:
+ 0xffffffff806fce94 <+0>: sd ra,-8(sp)
+ 0xffffffff806fce98 <+4>: auipc ra,0xff90f
+ 0xffffffff806fce9c <+8>: jalr -684(ra) # 0xffffffff8000bbec
+<ftrace_caller>
+ 0xffffffff806fcea0 <+12>: ld ra,-8(sp)
+ 0xffffffff806fcea4 <+16>: nop /* wasted */
+ 0xffffffff806fcea8 <+20>: nop /* wasted */
+ 0xffffffff806fceac <+24>: nop /* wasted */
+ 0xffffffff806fceb0 <+28>: nop /* wasted */
+ 0xffffffff806fceb4 <+0>: addi sp,sp,-48
+ 0xffffffff806fceb8 <+4>: sd s0,32(sp)
+ 0xffffffff806fcebc <+8>: sd s1,24(sp)
+ 0xffffffff806fcec0 <+12>: sd s2,16(sp)
+ 0xffffffff806fcec4 <+16>: sd s3,8(sp)
+ 0xffffffff806fcec8 <+20>: sd ra,40(sp)
+ 0xffffffff806fcecc <+24>: addi s0,sp,48
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 6c1ef42d5a0df..3e3467dbbf73f 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -13,7 +13,11 @@ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
+ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ifeq ($(CONFIG_RISCV_ISA_C),y)
+ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
++else
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4
++endif
+ endif
+
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
+--
+2.39.2
+
--- /dev/null
+From 0675e74ed638f0e92597d9b136e83ee75cf74541 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 8 Mar 2023 10:16:39 +0100
+Subject: riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit 76950340cf03b149412fe0d5f0810e52ac1df8cb ]
+
+When CONFIG_FRAME_POINTER is unset, the stack unwinding function
+walk_stackframe randomly reads the stack and then, when KASAN is enabled,
+it can lead to the following backtrace:
+
+[ 0.000000] ==================================================================
+[ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a
+[ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0
+[ 0.000000]
+[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43
+[ 0.000000] Hardware name: riscv-virtio,qemu (DT)
+[ 0.000000] Call Trace:
+[ 0.000000] [<ffffffff80007ba8>] walk_stackframe+0x0/0x11a
+[ 0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a
+[ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[ 0.000000] [<ffffffff80c49c80>] dump_stack_lvl+0x22/0x36
+[ 0.000000] [<ffffffff80c3783e>] print_report+0x198/0x4a8
+[ 0.000000] [<ffffffff80099ecc>] init_param_lock+0x26/0x2a
+[ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[ 0.000000] [<ffffffff8015f68a>] kasan_report+0x9a/0xc8
+[ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[ 0.000000] [<ffffffff80007c4a>] walk_stackframe+0xa2/0x11a
+[ 0.000000] [<ffffffff8006e99c>] desc_make_final+0x80/0x84
+[ 0.000000] [<ffffffff8009a04e>] stack_trace_save+0x88/0xa6
+[ 0.000000] [<ffffffff80099fc2>] filter_irq_stacks+0x72/0x76
+[ 0.000000] [<ffffffff8006b95e>] devkmsg_read+0x32a/0x32e
+[ 0.000000] [<ffffffff8015ec16>] kasan_save_stack+0x28/0x52
+[ 0.000000] [<ffffffff8006e998>] desc_make_final+0x7c/0x84
+[ 0.000000] [<ffffffff8009a04a>] stack_trace_save+0x84/0xa6
+[ 0.000000] [<ffffffff8015ec52>] kasan_set_track+0x12/0x20
+[ 0.000000] [<ffffffff8015f22e>] __kasan_slab_alloc+0x58/0x5e
+[ 0.000000] [<ffffffff8015e7ea>] __kmem_cache_create+0x21e/0x39a
+[ 0.000000] [<ffffffff80e133ac>] create_boot_cache+0x70/0x9c
+[ 0.000000] [<ffffffff80e17ab2>] kmem_cache_init+0x6c/0x11e
+[ 0.000000] [<ffffffff80e00fd6>] mm_init+0xd8/0xfe
+[ 0.000000] [<ffffffff80e011d8>] start_kernel+0x190/0x3ca
+[ 0.000000]
+[ 0.000000] The buggy address belongs to stack of task swapper/0
+[ 0.000000] and is located at offset 0 in frame:
+[ 0.000000] stack_trace_save+0x0/0xa6
+[ 0.000000]
+[ 0.000000] This frame has 1 object:
+[ 0.000000] [32, 56) 'c'
+[ 0.000000]
+[ 0.000000] The buggy address belongs to the physical page:
+[ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07
+[ 0.000000] flags: 0x1000(reserved|zone=0)
+[ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000
+[ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff
+[ 0.000000] page dumped because: kasan: bad access detected
+[ 0.000000]
+[ 0.000000] Memory state around the buggy address:
+[ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3
+[ 0.000000] ^
+[ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+[ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+[ 0.000000] ==================================================================
+
+Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise
+mode.
+
+Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly")
+Reported-by: Chathura Rajapaksha <chathura.abeyrathne.lk@gmail.com>
+Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/
+Suggested-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/stacktrace.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
+index 1e53fbe5eb783..9c34735c1e771 100644
+--- a/arch/riscv/kernel/stacktrace.c
++++ b/arch/riscv/kernel/stacktrace.c
+@@ -96,7 +96,7 @@ void notrace walk_stackframe(struct task_struct *task,
+ while (!kstack_end(ksp)) {
+ if (__kernel_text_address(pc) && unlikely(fn(pc, arg)))
+ break;
+- pc = (*ksp++) - 0x4;
++ pc = READ_ONCE_NOCHECK(*ksp++) - 0x4;
+ }
+ }
+
+--
+2.39.2
+
--- /dev/null
+From abb0542c5e210048f5905f02935615dc315a5240 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Dec 2020 16:01:41 +0000
+Subject: riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit afc76b8b80112189b6f11e67e19cf58301944814 ]
+
+This patch changes the current detour mechanism of dynamic ftrace
+which has been discussed during LPC 2020 RISCV-MC [1].
+
+Before the patch, we used mcount for detour:
+<funca>:
+ addi sp,sp,-16
+ sd ra,8(sp)
+ sd s0,0(sp)
+ addi s0,sp,16
+ mv a5,ra
+ mv a0,a5
+ auipc ra,0x0 -> nop
+ jalr -296(ra) <_mcount@plt> ->nop
+ ...
+
+After the patch, we use nop call site area for detour:
+<funca>:
+ nop -> REG_S ra, -SZREG(sp)
+ nop -> auipc ra, 0x?
+ nop -> jalr ?(ra)
+ nop -> REG_L ra, -SZREG(sp)
+ ...
+
+The mcount mechanism is mixed with gcc function prologue which is
+not very clear. The patchable function entry just put 16 bytes nop
+before the front of the function prologue which could be filled
+with a separated detour mechanism.
+
+[1] https://www.linuxplumbersconf.org/event/7/contributions/807/
+
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Palmer Dabbelt <palmerdabbelt@google.com>
+Stable-dep-of: 409c8fb20c66 ("riscv: ftrace: Remove wasted nops for !RISCV_ISA_C")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/Makefile | 2 +
+ arch/riscv/kernel/ftrace.c | 95 ++++-----
+ arch/riscv/kernel/mcount-dyn.S | 342 +++++++++++++++------------------
+ 3 files changed, 204 insertions(+), 235 deletions(-)
+
+diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
+index 9446282b52bab..6c1ef42d5a0df 100644
+--- a/arch/riscv/Makefile
++++ b/arch/riscv/Makefile
+@@ -12,6 +12,8 @@ OBJCOPYFLAGS := -O binary
+ LDFLAGS_vmlinux :=
+ ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
+ LDFLAGS_vmlinux := --no-relax
++ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
++ CC_FLAGS_FTRACE := -fpatchable-function-entry=8
+ endif
+
+ ifeq ($(CONFIG_CMODEL_MEDLOW),y)
+diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c
+index 765b62434f303..7f1e5203de886 100644
+--- a/arch/riscv/kernel/ftrace.c
++++ b/arch/riscv/kernel/ftrace.c
+@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
+ return 0;
+ }
+
++/*
++ * Put 5 instructions with 16 bytes at the front of function within
++ * patchable function entry nops' area.
++ *
++ * 0: REG_S ra, -SZREG(sp)
++ * 1: auipc ra, 0x?
++ * 2: jalr -?(ra)
++ * 3: REG_L ra, -SZREG(sp)
++ *
++ * So the opcodes is:
++ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
++ * 1: 0x???????? -> auipc
++ * 2: 0x???????? -> jalr
++ * 3: 0xff813083 (ld)/0xffc12083 (lw)
++ */
++#if __riscv_xlen == 64
++#define INSN0 0xfe113c23
++#define INSN3 0xff813083
++#elif __riscv_xlen == 32
++#define INSN0 0xfe112e23
++#define INSN3 0xffc12083
++#endif
++
++#define FUNC_ENTRY_SIZE 16
++#define FUNC_ENTRY_JMP 4
++
+ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+ {
+- int ret = ftrace_check_current_call(rec->ip, NULL);
++ unsigned int call[4] = {INSN0, 0, 0, INSN3};
++ unsigned long target = addr;
++ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
+
+- if (ret)
+- return ret;
++ call[1] = to_auipc_insn((unsigned int)(target - caller));
++ call[2] = to_jalr_insn((unsigned int)(target - caller));
+
+- return __ftrace_modify_call(rec->ip, addr, true);
++ if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
++ return -EPERM;
++
++ return 0;
+ }
+
+ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
+ unsigned long addr)
+ {
+- unsigned int call[2];
+- int ret;
++ unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
+
+- make_call(rec->ip, addr, call);
+- ret = ftrace_check_current_call(rec->ip, call);
+-
+- if (ret)
+- return ret;
++ if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
++ return -EPERM;
+
+- return __ftrace_modify_call(rec->ip, addr, false);
++ return 0;
+ }
+
+
+@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+ unsigned long addr)
+ {
+ unsigned int call[2];
++ unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
+ int ret;
+
+- make_call(rec->ip, old_addr, call);
+- ret = ftrace_check_current_call(rec->ip, call);
++ make_call(caller, old_addr, call);
++ ret = ftrace_check_current_call(caller, call);
+
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call(rec->ip, addr, true);
++ return __ftrace_modify_call(caller, addr, true);
+ }
+ #endif
+
+@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+
+ #ifdef CONFIG_DYNAMIC_FTRACE
+ extern void ftrace_graph_call(void);
++extern void ftrace_graph_regs_call(void);
+ int ftrace_enable_ftrace_graph_caller(void)
+ {
+- unsigned int call[2];
+- static int init_graph = 1;
+ int ret;
+
+- make_call(&ftrace_graph_call, &ftrace_stub, call);
+-
+- /*
+- * When enabling graph tracer for the first time, ftrace_graph_call
+- * should contains a call to ftrace_stub. Once it has been disabled,
+- * the 8-bytes at the position becomes NOPs.
+- */
+- if (init_graph) {
+- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+- call);
+- init_graph = 0;
+- } else {
+- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+- NULL);
+- }
+-
++ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++ (unsigned long)&prepare_ftrace_return, true);
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+ (unsigned long)&prepare_ftrace_return, true);
+ }
+
+ int ftrace_disable_ftrace_graph_caller(void)
+ {
+- unsigned int call[2];
+ int ret;
+
+- make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
+-
+- /*
+- * This is to make sure that ftrace_enable_ftrace_graph_caller
+- * did the right thing.
+- */
+- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
+- call);
+-
++ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++ (unsigned long)&prepare_ftrace_return, false);
+ if (ret)
+ return ret;
+
+- return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
++ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
+ (unsigned long)&prepare_ftrace_return, false);
+ }
+ #endif /* CONFIG_DYNAMIC_FTRACE */
+diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
+index 35a6ed76cb8b7..d171eca623b6f 100644
+--- a/arch/riscv/kernel/mcount-dyn.S
++++ b/arch/riscv/kernel/mcount-dyn.S
+@@ -13,224 +13,186 @@
+
+ .text
+
+- .macro SAVE_ABI_STATE
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi sp, sp, -48
+- sd s0, 32(sp)
+- sd ra, 40(sp)
+- addi s0, sp, 48
+- sd t0, 24(sp)
+- sd t1, 16(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+- sd t2, 8(sp)
+-#endif
+-#else
+- addi sp, sp, -16
+- sd s0, 0(sp)
+- sd ra, 8(sp)
+- addi s0, sp, 16
+-#endif
++#define FENTRY_RA_OFFSET 12
++#define ABI_SIZE_ON_STACK 72
++#define ABI_A0 0
++#define ABI_A1 8
++#define ABI_A2 16
++#define ABI_A3 24
++#define ABI_A4 32
++#define ABI_A5 40
++#define ABI_A6 48
++#define ABI_A7 56
++#define ABI_RA 64
++
++ .macro SAVE_ABI
++ addi sp, sp, -SZREG
++ addi sp, sp, -ABI_SIZE_ON_STACK
++
++ REG_S a0, ABI_A0(sp)
++ REG_S a1, ABI_A1(sp)
++ REG_S a2, ABI_A2(sp)
++ REG_S a3, ABI_A3(sp)
++ REG_S a4, ABI_A4(sp)
++ REG_S a5, ABI_A5(sp)
++ REG_S a6, ABI_A6(sp)
++ REG_S a7, ABI_A7(sp)
++ REG_S ra, ABI_RA(sp)
+ .endm
+
+- .macro RESTORE_ABI_STATE
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- ld s0, 32(sp)
+- ld ra, 40(sp)
+- addi sp, sp, 48
+-#else
+- ld ra, 8(sp)
+- ld s0, 0(sp)
+- addi sp, sp, 16
+-#endif
++ .macro RESTORE_ABI
++ REG_L a0, ABI_A0(sp)
++ REG_L a1, ABI_A1(sp)
++ REG_L a2, ABI_A2(sp)
++ REG_L a3, ABI_A3(sp)
++ REG_L a4, ABI_A4(sp)
++ REG_L a5, ABI_A5(sp)
++ REG_L a6, ABI_A6(sp)
++ REG_L a7, ABI_A7(sp)
++ REG_L ra, ABI_RA(sp)
++
++ addi sp, sp, ABI_SIZE_ON_STACK
++ addi sp, sp, SZREG
+ .endm
+
+- .macro RESTORE_GRAPH_ARGS
+- ld a0, 24(sp)
+- ld a1, 16(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+- ld a2, 8(sp)
+-#endif
++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
++ .macro SAVE_ALL
++ addi sp, sp, -SZREG
++ addi sp, sp, -PT_SIZE_ON_STACK
++
++ REG_S x1, PT_EPC(sp)
++ addi sp, sp, PT_SIZE_ON_STACK
++ REG_L x1, (sp)
++ addi sp, sp, -PT_SIZE_ON_STACK
++ REG_S x1, PT_RA(sp)
++ REG_L x1, PT_EPC(sp)
++
++ REG_S x2, PT_SP(sp)
++ REG_S x3, PT_GP(sp)
++ REG_S x4, PT_TP(sp)
++ REG_S x5, PT_T0(sp)
++ REG_S x6, PT_T1(sp)
++ REG_S x7, PT_T2(sp)
++ REG_S x8, PT_S0(sp)
++ REG_S x9, PT_S1(sp)
++ REG_S x10, PT_A0(sp)
++ REG_S x11, PT_A1(sp)
++ REG_S x12, PT_A2(sp)
++ REG_S x13, PT_A3(sp)
++ REG_S x14, PT_A4(sp)
++ REG_S x15, PT_A5(sp)
++ REG_S x16, PT_A6(sp)
++ REG_S x17, PT_A7(sp)
++ REG_S x18, PT_S2(sp)
++ REG_S x19, PT_S3(sp)
++ REG_S x20, PT_S4(sp)
++ REG_S x21, PT_S5(sp)
++ REG_S x22, PT_S6(sp)
++ REG_S x23, PT_S7(sp)
++ REG_S x24, PT_S8(sp)
++ REG_S x25, PT_S9(sp)
++ REG_S x26, PT_S10(sp)
++ REG_S x27, PT_S11(sp)
++ REG_S x28, PT_T3(sp)
++ REG_S x29, PT_T4(sp)
++ REG_S x30, PT_T5(sp)
++ REG_S x31, PT_T6(sp)
+ .endm
+
+-ENTRY(ftrace_graph_caller)
+- addi sp, sp, -16
+- sd s0, 0(sp)
+- sd ra, 8(sp)
+- addi s0, sp, 16
+-ftrace_graph_call:
+- .global ftrace_graph_call
+- /*
+- * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
+- * call below. Check ftrace_modify_all_code for details.
+- */
+- call ftrace_stub
+- ld ra, 8(sp)
+- ld s0, 0(sp)
+- addi sp, sp, 16
+- ret
+-ENDPROC(ftrace_graph_caller)
++ .macro RESTORE_ALL
++ REG_L x1, PT_RA(sp)
++ addi sp, sp, PT_SIZE_ON_STACK
++ REG_S x1, (sp)
++ addi sp, sp, -PT_SIZE_ON_STACK
++ REG_L x1, PT_EPC(sp)
++ REG_L x2, PT_SP(sp)
++ REG_L x3, PT_GP(sp)
++ REG_L x4, PT_TP(sp)
++ REG_L x5, PT_T0(sp)
++ REG_L x6, PT_T1(sp)
++ REG_L x7, PT_T2(sp)
++ REG_L x8, PT_S0(sp)
++ REG_L x9, PT_S1(sp)
++ REG_L x10, PT_A0(sp)
++ REG_L x11, PT_A1(sp)
++ REG_L x12, PT_A2(sp)
++ REG_L x13, PT_A3(sp)
++ REG_L x14, PT_A4(sp)
++ REG_L x15, PT_A5(sp)
++ REG_L x16, PT_A6(sp)
++ REG_L x17, PT_A7(sp)
++ REG_L x18, PT_S2(sp)
++ REG_L x19, PT_S3(sp)
++ REG_L x20, PT_S4(sp)
++ REG_L x21, PT_S5(sp)
++ REG_L x22, PT_S6(sp)
++ REG_L x23, PT_S7(sp)
++ REG_L x24, PT_S8(sp)
++ REG_L x25, PT_S9(sp)
++ REG_L x26, PT_S10(sp)
++ REG_L x27, PT_S11(sp)
++ REG_L x28, PT_T3(sp)
++ REG_L x29, PT_T4(sp)
++ REG_L x30, PT_T5(sp)
++ REG_L x31, PT_T6(sp)
++
++ addi sp, sp, PT_SIZE_ON_STACK
++ addi sp, sp, SZREG
++ .endm
++#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
+ ENTRY(ftrace_caller)
+- /*
+- * a0: the address in the caller when calling ftrace_caller
+- * a1: the caller's return address
+- * a2: the address of global variable function_trace_op
+- */
+- ld a1, -8(s0)
+- addi a0, ra, -MCOUNT_INSN_SIZE
+- la t5, function_trace_op
+- ld a2, 0(t5)
++ SAVE_ABI
+
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- /*
+- * the graph tracer (specifically, prepare_ftrace_return) needs these
+- * arguments but for now the function tracer occupies the regs, so we
+- * save them in temporary regs to recover later.
+- */
+- addi t0, s0, -8
+- mv t1, a0
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+- ld t2, -16(s0)
+-#endif
+-#endif
++ addi a0, ra, -FENTRY_RA_OFFSET
++ la a1, function_trace_op
++ REG_L a2, 0(a1)
++ REG_L a1, ABI_SIZE_ON_STACK(sp)
++ mv a3, sp
+
+- SAVE_ABI_STATE
+ ftrace_call:
+ .global ftrace_call
+- /*
+- * For the dynamic ftrace to work, here we should reserve at least
+- * 8 bytes for a functional auipc-jalr pair. The following call
+- * serves this purpose.
+- *
+- * Calling ftrace_update_ftrace_func would overwrite the nops below.
+- * Check ftrace_modify_all_code for details.
+- */
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- RESTORE_GRAPH_ARGS
+- call ftrace_graph_caller
++ addi a0, sp, ABI_SIZE_ON_STACK
++ REG_L a1, ABI_RA(sp)
++ addi a1, a1, -FENTRY_RA_OFFSET
++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
++ mv a2, s0
+ #endif
+-
+- RESTORE_ABI_STATE
++ftrace_graph_call:
++ .global ftrace_graph_call
++ call ftrace_stub
++#endif
++ RESTORE_ABI
+ ret
+ ENDPROC(ftrace_caller)
+
+ #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+- .macro SAVE_ALL
+- addi sp, sp, -(PT_SIZE_ON_STACK+16)
+- sd s0, (PT_SIZE_ON_STACK)(sp)
+- sd ra, (PT_SIZE_ON_STACK+8)(sp)
+- addi s0, sp, (PT_SIZE_ON_STACK+16)
+-
+- sd x1, PT_RA(sp)
+- sd x2, PT_SP(sp)
+- sd x3, PT_GP(sp)
+- sd x4, PT_TP(sp)
+- sd x5, PT_T0(sp)
+- sd x6, PT_T1(sp)
+- sd x7, PT_T2(sp)
+- sd x8, PT_S0(sp)
+- sd x9, PT_S1(sp)
+- sd x10, PT_A0(sp)
+- sd x11, PT_A1(sp)
+- sd x12, PT_A2(sp)
+- sd x13, PT_A3(sp)
+- sd x14, PT_A4(sp)
+- sd x15, PT_A5(sp)
+- sd x16, PT_A6(sp)
+- sd x17, PT_A7(sp)
+- sd x18, PT_S2(sp)
+- sd x19, PT_S3(sp)
+- sd x20, PT_S4(sp)
+- sd x21, PT_S5(sp)
+- sd x22, PT_S6(sp)
+- sd x23, PT_S7(sp)
+- sd x24, PT_S8(sp)
+- sd x25, PT_S9(sp)
+- sd x26, PT_S10(sp)
+- sd x27, PT_S11(sp)
+- sd x28, PT_T3(sp)
+- sd x29, PT_T4(sp)
+- sd x30, PT_T5(sp)
+- sd x31, PT_T6(sp)
+- .endm
+-
+- .macro RESTORE_ALL
+- ld x1, PT_RA(sp)
+- ld x2, PT_SP(sp)
+- ld x3, PT_GP(sp)
+- ld x4, PT_TP(sp)
+- ld x5, PT_T0(sp)
+- ld x6, PT_T1(sp)
+- ld x7, PT_T2(sp)
+- ld x8, PT_S0(sp)
+- ld x9, PT_S1(sp)
+- ld x10, PT_A0(sp)
+- ld x11, PT_A1(sp)
+- ld x12, PT_A2(sp)
+- ld x13, PT_A3(sp)
+- ld x14, PT_A4(sp)
+- ld x15, PT_A5(sp)
+- ld x16, PT_A6(sp)
+- ld x17, PT_A7(sp)
+- ld x18, PT_S2(sp)
+- ld x19, PT_S3(sp)
+- ld x20, PT_S4(sp)
+- ld x21, PT_S5(sp)
+- ld x22, PT_S6(sp)
+- ld x23, PT_S7(sp)
+- ld x24, PT_S8(sp)
+- ld x25, PT_S9(sp)
+- ld x26, PT_S10(sp)
+- ld x27, PT_S11(sp)
+- ld x28, PT_T3(sp)
+- ld x29, PT_T4(sp)
+- ld x30, PT_T5(sp)
+- ld x31, PT_T6(sp)
+-
+- ld s0, (PT_SIZE_ON_STACK)(sp)
+- ld ra, (PT_SIZE_ON_STACK+8)(sp)
+- addi sp, sp, (PT_SIZE_ON_STACK+16)
+- .endm
+-
+- .macro RESTORE_GRAPH_REG_ARGS
+- ld a0, PT_T0(sp)
+- ld a1, PT_T1(sp)
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+- ld a2, PT_T2(sp)
+-#endif
+- .endm
+-
+-/*
+- * Most of the contents are the same as ftrace_caller.
+- */
+ ENTRY(ftrace_regs_caller)
+- /*
+- * a3: the address of all registers in the stack
+- */
+- ld a1, -8(s0)
+- addi a0, ra, -MCOUNT_INSN_SIZE
+- la t5, function_trace_op
+- ld a2, 0(t5)
+- addi a3, sp, -(PT_SIZE_ON_STACK+16)
+-
+-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- addi t0, s0, -8
+- mv t1, a0
+-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+- ld t2, -16(s0)
+-#endif
+-#endif
+ SAVE_ALL
+
++ addi a0, ra, -FENTRY_RA_OFFSET
++ la a1, function_trace_op
++ REG_L a2, 0(a1)
++ REG_L a1, PT_SIZE_ON_STACK(sp)
++ mv a3, sp
++
+ ftrace_regs_call:
+ .global ftrace_regs_call
+ call ftrace_stub
+
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- RESTORE_GRAPH_REG_ARGS
+- call ftrace_graph_caller
++ addi a0, sp, PT_RA
++ REG_L a1, PT_EPC(sp)
++ addi a1, a1, -FENTRY_RA_OFFSET
++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
++ mv a2, s0
++#endif
++ftrace_graph_regs_call:
++ .global ftrace_graph_regs_call
++ call ftrace_stub
+ #endif
+
+ RESTORE_ALL
+--
+2.39.2
+
--- /dev/null
+From f8a58485c0ab2ebd34421c68b42f82b6751ab879 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Feb 2023 12:52:00 -0800
+Subject: scsi: core: Remove the /proc/scsi/${proc_name} directory earlier
+
+From: Bart Van Assche <bvanassche@acm.org>
+
+[ Upstream commit fc663711b94468f4e1427ebe289c9f05669699c9 ]
+
+Remove the /proc/scsi/${proc_name} directory earlier to fix a race
+condition between unloading and reloading kernel modules. This fixes a bug
+introduced in 2009 by commit 77c019768f06 ("[SCSI] fix /proc memory leak in
+the SCSI core").
+
+Fix the following kernel warning:
+
+proc_dir_entry 'scsi/scsi_debug' already registered
+WARNING: CPU: 19 PID: 27986 at fs/proc/generic.c:376 proc_register+0x27d/0x2e0
+Call Trace:
+ proc_mkdir+0xb5/0xe0
+ scsi_proc_hostdir_add+0xb5/0x170
+ scsi_host_alloc+0x683/0x6c0
+ sdebug_driver_probe+0x6b/0x2d0 [scsi_debug]
+ really_probe+0x159/0x540
+ __driver_probe_device+0xdc/0x230
+ driver_probe_device+0x4f/0x120
+ __device_attach_driver+0xef/0x180
+ bus_for_each_drv+0xe5/0x130
+ __device_attach+0x127/0x290
+ device_initial_probe+0x17/0x20
+ bus_probe_device+0x110/0x130
+ device_add+0x673/0xc80
+ device_register+0x1e/0x30
+ sdebug_add_host_helper+0x1a7/0x3b0 [scsi_debug]
+ scsi_debug_init+0x64f/0x1000 [scsi_debug]
+ do_one_initcall+0xd7/0x470
+ do_init_module+0xe7/0x330
+ load_module+0x122a/0x12c0
+ __do_sys_finit_module+0x124/0x1a0
+ __x64_sys_finit_module+0x46/0x50
+ do_syscall_64+0x38/0x80
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Link: https://lore.kernel.org/r/20230210205200.36973-3-bvanassche@acm.org
+Cc: Alan Stern <stern@rowland.harvard.edu>
+Cc: Yi Zhang <yi.zhang@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core")
+Reported-by: Yi Zhang <yi.zhang@redhat.com>
+Signed-off-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/hosts.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
+index d664c4650b2dd..fae0323242103 100644
+--- a/drivers/scsi/hosts.c
++++ b/drivers/scsi/hosts.c
+@@ -180,6 +180,7 @@ void scsi_remove_host(struct Scsi_Host *shost)
+ scsi_forget_host(shost);
+ mutex_unlock(&shost->scan_mutex);
+ scsi_proc_host_rm(shost);
++ scsi_proc_hostdir_rm(shost->hostt);
+
+ spin_lock_irqsave(shost->host_lock, flags);
+ if (scsi_host_set_state(shost, SHOST_DEL))
+@@ -321,6 +322,7 @@ static void scsi_host_dev_release(struct device *dev)
+ struct Scsi_Host *shost = dev_to_shost(dev);
+ struct device *parent = dev->parent;
+
++ /* In case scsi_remove_host() has not been called. */
+ scsi_proc_hostdir_rm(shost->hostt);
+
+ /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */
+--
+2.39.2
+
--- /dev/null
+From bc488ae13b236a092e733fa7e55c51c6c308a204 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Mar 2023 16:23:40 +0530
+Subject: scsi: megaraid_sas: Update max supported LD IDs to 240
+
+From: Chandrakanth Patil <chandrakanth.patil@broadcom.com>
+
+[ Upstream commit bfa659177dcba48cf13f2bd88c1972f12a60bf1c ]
+
+The firmware only supports Logical Disk IDs up to 240 and LD ID 255 (0xFF)
+is reserved for deleted LDs. However, in some cases, firmware was assigning
+LD ID 254 (0xFE) to deleted LDs and this was causing the driver to mark the
+wrong disk as deleted. This in turn caused the wrong disk device to be
+taken offline by the SCSI midlayer.
+
+To address this issue, limit the LD ID range from 255 to 240. This ensures
+the deleted LD ID is properly identified and removed by the driver without
+accidently deleting any valid LDs.
+
+Fixes: ae6874ba4b43 ("scsi: megaraid_sas: Early detection of VD deletion through RaidMap update")
+Reported-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Chandrakanth Patil <chandrakanth.patil@broadcom.com>
+Signed-off-by: Sumit Saxena <sumit.saxena@broadcom.com>
+Link: https://lore.kernel.org/r/20230302105342.34933-2-chandrakanth.patil@broadcom.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/megaraid/megaraid_sas.h | 2 ++
+ drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h
+index c088a848776ef..2d5b1d5978664 100644
+--- a/drivers/scsi/megaraid/megaraid_sas.h
++++ b/drivers/scsi/megaraid/megaraid_sas.h
+@@ -1517,6 +1517,8 @@ struct megasas_ctrl_info {
+ #define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \
+ MEGASAS_MAX_DEV_PER_CHANNEL)
+
++#define MEGASAS_MAX_SUPPORTED_LD_IDS 240
++
+ #define MEGASAS_MAX_SECTORS (2*1024)
+ #define MEGASAS_MAX_SECTORS_IEEE (2*128)
+ #define MEGASAS_DBG_LVL 1
+diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c
+index 83f69c33b01a9..ec10d35b4685a 100644
+--- a/drivers/scsi/megaraid/megaraid_sas_fp.c
++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c
+@@ -358,7 +358,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id)
+ ld = MR_TargetIdToLdGet(i, drv_map);
+
+ /* For non existing VDs, iterate to next VD*/
+- if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1))
++ if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS)
+ continue;
+
+ raid = MR_LdRaidGet(ld, drv_map);
+--
+2.39.2
+
--- /dev/null
+From 76f7cb6aeb2007c0d4febda804772cf6ae508604 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 6 May 2022 18:05:09 +0200
+Subject: selftests/landlock: Add clang-format exceptions
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@digikod.net>
+
+[ Upstream commit 4598d9abf4215e1e371a35683350d50122793c80 ]
+
+In preparation to a following commit, add clang-format on and
+clang-format off stanzas around constant definitions and the TEST_F_FORK
+macro. This enables to keep aligned values, which is much more readable
+than packed definitions.
+
+Add other clang-format exceptions for FIXTURE() and
+FIXTURE_VARIANT_ADD() declarations to force space before open brace,
+which is reported by checkpatch.pl .
+
+Link: https://lore.kernel.org/r/20220506160513.523257-4-mic@digikod.net
+Cc: stable@vger.kernel.org
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Stable-dep-of: 8677e555f17f ("selftests/landlock: Test ptrace as much as possible with Yama")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/landlock/common.h | 2 ++
+ tools/testing/selftests/landlock/fs_test.c | 23 ++++++++++++++-----
+ .../testing/selftests/landlock/ptrace_test.c | 20 +++++++++++++++-
+ 3 files changed, 38 insertions(+), 7 deletions(-)
+
+diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
+index 20e2a9286d710..61127fffbeb83 100644
+--- a/tools/testing/selftests/landlock/common.h
++++ b/tools/testing/selftests/landlock/common.h
+@@ -29,6 +29,7 @@
+ * this to be possible, we must not call abort() but instead exit smoothly
+ * (hence the step print).
+ */
++/* clang-format off */
+ #define TEST_F_FORK(fixture_name, test_name) \
+ static void fixture_name##_##test_name##_child( \
+ struct __test_metadata *_metadata, \
+@@ -75,6 +76,7 @@
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
++/* clang-format on */
+
+ #ifndef landlock_create_ruleset
+ static inline int landlock_create_ruleset(
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+index db153452b110a..036d55836b9ec 100644
+--- a/tools/testing/selftests/landlock/fs_test.c
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -256,8 +256,9 @@ static void remove_layout1(struct __test_metadata *const _metadata)
+ EXPECT_EQ(0, remove_path(dir_s3d2));
+ }
+
+-FIXTURE(layout1) {
+-};
++/* clang-format off */
++FIXTURE(layout1) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout1)
+ {
+@@ -411,6 +412,8 @@ TEST_F_FORK(layout1, inval)
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+
++/* clang-format off */
++
+ #define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+@@ -431,6 +434,8 @@ TEST_F_FORK(layout1, inval)
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ ACCESS_LAST)
+
++/* clang-format on */
++
+ TEST_F_FORK(layout1, file_access_rights)
+ {
+ __u64 access;
+@@ -487,6 +492,8 @@ struct rule {
+ __u64 access;
+ };
+
++/* clang-format off */
++
+ #define ACCESS_RO ( \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR)
+@@ -495,6 +502,8 @@ struct rule {
+ ACCESS_RO | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE)
+
++/* clang-format on */
++
+ static int create_ruleset(struct __test_metadata *const _metadata,
+ const __u64 handled_access_fs, const struct rule rules[])
+ {
+@@ -2105,8 +2114,9 @@ TEST_F_FORK(layout1, proc_pipe)
+ ASSERT_EQ(0, close(pipe_fds[1]));
+ }
+
+-FIXTURE(layout1_bind) {
+-};
++/* clang-format off */
++FIXTURE(layout1_bind) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout1_bind)
+ {
+@@ -2446,8 +2456,9 @@ static const char (*merge_sub_files[])[] = {
+ * └── work
+ */
+
+-FIXTURE(layout2_overlay) {
+-};
++/* clang-format off */
++FIXTURE(layout2_overlay) {};
++/* clang-format on */
+
+ FIXTURE_SETUP(layout2_overlay)
+ {
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+index 15fbef9cc8496..090adadfe2dc3 100644
+--- a/tools/testing/selftests/landlock/ptrace_test.c
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -59,7 +59,9 @@ static int test_ptrace_read(const pid_t pid)
+ return 0;
+ }
+
+-FIXTURE(hierarchy) { };
++/* clang-format off */
++FIXTURE(hierarchy) {};
++/* clang-format on */
+
+ FIXTURE_VARIANT(hierarchy) {
+ const bool domain_both;
+@@ -83,7 +85,9 @@ FIXTURE_VARIANT(hierarchy) {
+ * \ P2 -> P1 : allow
+ * 'P2
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = false,
+@@ -98,7 +102,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+ * | P2 |
+ * '------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = true,
+@@ -112,7 +118,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+ * '
+ * P2
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = false,
+@@ -127,7 +135,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+ * | P2 |
+ * '------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
++ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = true,
+@@ -142,7 +152,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+ * | P2 |
+ * '-------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = false,
+@@ -158,7 +170,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+ * | '------' |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = true,
+@@ -174,7 +188,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+ * | P2 |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = false,
+@@ -192,7 +208,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+ * | '------' |
+ * '-----------------'
+ */
++/* clang-format off */
+ FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
++ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = true,
+--
+2.39.2
+
--- /dev/null
+From 9f43c1d77d1e7cac1c93a6f95a6d91ebc4089a73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 22 Apr 2021 17:41:20 +0200
+Subject: selftests/landlock: Add user space tests
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mickaël Salaün <mic@linux.microsoft.com>
+
+[ Upstream commit e1199815b47be83346c03e20a3de76f934e4bb34 ]
+
+Test all Landlock system calls, ptrace hooks semantic and filesystem
+access-control with multiple layouts.
+
+Test coverage for security/landlock/ is 93.6% of lines. The code not
+covered only deals with internal kernel errors (e.g. memory allocation)
+and race conditions.
+
+Cc: James Morris <jmorris@namei.org>
+Cc: Jann Horn <jannh@google.com>
+Cc: Serge E. Hallyn <serge@hallyn.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Signed-off-by: Mickaël Salaün <mic@linux.microsoft.com>
+Reviewed-by: Vincent Dagonneau <vincent.dagonneau@ssi.gouv.fr>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/r/20210422154123.13086-11-mic@digikod.net
+Signed-off-by: James Morris <jamorris@linux.microsoft.com>
+Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ MAINTAINERS | 1 +
+ tools/testing/selftests/Makefile | 1 +
+ tools/testing/selftests/landlock/.gitignore | 2 +
+ tools/testing/selftests/landlock/Makefile | 24 +
+ tools/testing/selftests/landlock/base_test.c | 219 ++
+ tools/testing/selftests/landlock/common.h | 183 ++
+ tools/testing/selftests/landlock/config | 7 +
+ tools/testing/selftests/landlock/fs_test.c | 2791 +++++++++++++++++
+ .../testing/selftests/landlock/ptrace_test.c | 337 ++
+ tools/testing/selftests/landlock/true.c | 5 +
+ 10 files changed, 3570 insertions(+)
+ create mode 100644 tools/testing/selftests/landlock/.gitignore
+ create mode 100644 tools/testing/selftests/landlock/Makefile
+ create mode 100644 tools/testing/selftests/landlock/base_test.c
+ create mode 100644 tools/testing/selftests/landlock/common.h
+ create mode 100644 tools/testing/selftests/landlock/config
+ create mode 100644 tools/testing/selftests/landlock/fs_test.c
+ create mode 100644 tools/testing/selftests/landlock/ptrace_test.c
+ create mode 100644 tools/testing/selftests/landlock/true.c
+
+diff --git a/MAINTAINERS b/MAINTAINERS
+index 72815c1a325eb..5bc6a028236e3 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -9843,6 +9843,7 @@ S: Supported
+ W: https://landlock.io
+ T: git https://github.com/landlock-lsm/linux.git
+ F: security/landlock/
++F: tools/testing/selftests/landlock/
+ K: landlock
+ K: LANDLOCK
+
+diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
+index db1e24d7155fa..ca96973dca44d 100644
+--- a/tools/testing/selftests/Makefile
++++ b/tools/testing/selftests/Makefile
+@@ -26,6 +26,7 @@ TARGETS += ir
+ TARGETS += kcmp
+ TARGETS += kexec
+ TARGETS += kvm
++TARGETS += landlock
+ TARGETS += lib
+ TARGETS += livepatch
+ TARGETS += lkdtm
+diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
+new file mode 100644
+index 0000000000000..470203a7cd737
+--- /dev/null
++++ b/tools/testing/selftests/landlock/.gitignore
+@@ -0,0 +1,2 @@
++/*_test
++/true
+diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
+new file mode 100644
+index 0000000000000..a99596ca9882b
+--- /dev/null
++++ b/tools/testing/selftests/landlock/Makefile
+@@ -0,0 +1,24 @@
++# SPDX-License-Identifier: GPL-2.0
++
++CFLAGS += -Wall -O2
++
++src_test := $(wildcard *_test.c)
++
++TEST_GEN_PROGS := $(src_test:.c=)
++
++TEST_GEN_PROGS_EXTENDED := true
++
++KSFT_KHDR_INSTALL := 1
++OVERRIDE_TARGETS := 1
++include ../lib.mk
++
++khdr_dir = $(top_srcdir)/usr/include
++
++$(khdr_dir)/linux/landlock.h: khdr
++ @:
++
++$(OUTPUT)/true: true.c
++ $(LINK.c) $< $(LDLIBS) -o $@ -static
++
++$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h
++ $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir)
+diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
+new file mode 100644
+index 0000000000000..262c3c8d953ad
+--- /dev/null
++++ b/tools/testing/selftests/landlock/base_test.c
+@@ -0,0 +1,219 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Common user space base
++ *
++ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2019-2020 ANSSI
++ */
++
++#define _GNU_SOURCE
++#include <errno.h>
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <string.h>
++#include <sys/prctl.h>
++#include <sys/socket.h>
++#include <sys/types.h>
++
++#include "common.h"
++
++#ifndef O_PATH
++#define O_PATH 010000000
++#endif
++
++TEST(inconsistent_attr) {
++ const long page_size = sysconf(_SC_PAGESIZE);
++ char *const buf = malloc(page_size + 1);
++ struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
++
++ ASSERT_NE(NULL, buf);
++
++ /* Checks copy_from_user(). */
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0));
++ /* The size if less than sizeof(struct landlock_attr_enforce). */
++ ASSERT_EQ(EINVAL, errno);
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
++ ASSERT_EQ(EINVAL, errno);
++
++ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
++ /* The size if less than sizeof(struct landlock_attr_enforce). */
++ ASSERT_EQ(EFAULT, errno);
++
++ ASSERT_EQ(-1, landlock_create_ruleset(NULL,
++ sizeof(struct landlock_ruleset_attr), 0));
++ ASSERT_EQ(EFAULT, errno);
++
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
++ ASSERT_EQ(E2BIG, errno);
++
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr,
++ sizeof(struct landlock_ruleset_attr), 0));
++ ASSERT_EQ(ENOMSG, errno);
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
++ ASSERT_EQ(ENOMSG, errno);
++
++ /* Checks non-zero value. */
++ buf[page_size - 2] = '.';
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
++ ASSERT_EQ(E2BIG, errno);
++
++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
++ ASSERT_EQ(E2BIG, errno);
++
++ free(buf);
++}
++
++TEST(empty_path_beneath_attr) {
++ const struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
++ };
++ const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ NULL, 0));
++ ASSERT_EQ(EFAULT, errno);
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST(inval_fd_enforce) {
++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++
++ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
++ ASSERT_EQ(EBADF, errno);
++}
++
++TEST(unpriv_enforce_without_no_new_privs) {
++ int err;
++
++ drop_caps(_metadata);
++ err = landlock_restrict_self(-1, 0);
++ ASSERT_EQ(EPERM, errno);
++ ASSERT_EQ(err, -1);
++}
++
++TEST(ruleset_fd_io)
++{
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
++ };
++ int ruleset_fd;
++ char buf;
++
++ drop_caps(_metadata);
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
++ ASSERT_EQ(EINVAL, errno);
++ ASSERT_EQ(-1, read(ruleset_fd, &buf, 1));
++ ASSERT_EQ(EINVAL, errno);
++
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */
++TEST(ruleset_fd_transfer)
++{
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
++ };
++ struct landlock_path_beneath_attr path_beneath_attr = {
++ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
++ };
++ int ruleset_fd_tx, dir_fd;
++ union {
++ /* Aligned ancillary data buffer. */
++ char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))];
++ struct cmsghdr _align;
++ } cmsg_tx = {};
++ char data_tx = '.';
++ struct iovec io = {
++ .iov_base = &data_tx,
++ .iov_len = sizeof(data_tx),
++ };
++ struct msghdr msg = {
++ .msg_iov = &io,
++ .msg_iovlen = 1,
++ .msg_control = &cmsg_tx.buf,
++ .msg_controllen = sizeof(cmsg_tx.buf),
++ };
++ struct cmsghdr *cmsg;
++ int socket_fds[2];
++ pid_t child;
++ int status;
++
++ drop_caps(_metadata);
++
++ /* Creates a test ruleset with a simple rule. */
++ ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd_tx);
++ path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW |
++ O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath_attr.parent_fd);
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath_attr, 0));
++ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
++
++ cmsg = CMSG_FIRSTHDR(&msg);
++ ASSERT_NE(NULL, cmsg);
++ cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx));
++ cmsg->cmsg_level = SOL_SOCKET;
++ cmsg->cmsg_type = SCM_RIGHTS;
++ memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx));
++
++ /* Sends the ruleset FD over a socketpair and then close it. */
++ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds));
++ ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0));
++ ASSERT_EQ(0, close(socket_fds[0]));
++ ASSERT_EQ(0, close(ruleset_fd_tx));
++
++ child = fork();
++ ASSERT_LE(0, child);
++ if (child == 0) {
++ int ruleset_fd_rx;
++
++ *(char *)msg.msg_iov->iov_base = '\0';
++ ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC));
++ ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base);
++ ASSERT_EQ(0, close(socket_fds[1]));
++ cmsg = CMSG_FIRSTHDR(&msg);
++ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx)));
++ memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx));
++
++ /* Enforces the received ruleset on the child. */
++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0));
++ ASSERT_EQ(0, close(ruleset_fd_rx));
++
++ /* Checks that the ruleset enforcement. */
++ ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
++ ASSERT_EQ(EACCES, errno);
++ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, dir_fd);
++ ASSERT_EQ(0, close(dir_fd));
++ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
++ return;
++ }
++
++ ASSERT_EQ(0, close(socket_fds[1]));
++
++ /* Checks that the parent is unrestricted. */
++ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, dir_fd);
++ ASSERT_EQ(0, close(dir_fd));
++ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, dir_fd);
++ ASSERT_EQ(0, close(dir_fd));
++
++ ASSERT_EQ(child, waitpid(child, &status, 0));
++ ASSERT_EQ(1, WIFEXITED(status));
++ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
+new file mode 100644
+index 0000000000000..20e2a9286d710
+--- /dev/null
++++ b/tools/testing/selftests/landlock/common.h
+@@ -0,0 +1,183 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * Landlock test helpers
++ *
++ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2019-2020 ANSSI
++ * Copyright © 2021 Microsoft Corporation
++ */
++
++#include <errno.h>
++#include <linux/landlock.h>
++#include <sys/capability.h>
++#include <sys/syscall.h>
++#include <sys/types.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "../kselftest_harness.h"
++
++#ifndef ARRAY_SIZE
++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
++#endif
++
++/*
++ * TEST_F_FORK() is useful when a test drop privileges but the corresponding
++ * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory
++ * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is
++ * also called when the test failed, but not when FIXTURE_SETUP() failed. For
++ * this to be possible, we must not call abort() but instead exit smoothly
++ * (hence the step print).
++ */
++#define TEST_F_FORK(fixture_name, test_name) \
++ static void fixture_name##_##test_name##_child( \
++ struct __test_metadata *_metadata, \
++ FIXTURE_DATA(fixture_name) *self, \
++ const FIXTURE_VARIANT(fixture_name) *variant); \
++ TEST_F(fixture_name, test_name) \
++ { \
++ int status; \
++ const pid_t child = fork(); \
++ if (child < 0) \
++ abort(); \
++ if (child == 0) { \
++ _metadata->no_print = 1; \
++ fixture_name##_##test_name##_child(_metadata, self, variant); \
++ if (_metadata->skip) \
++ _exit(255); \
++ if (_metadata->passed) \
++ _exit(0); \
++ _exit(_metadata->step); \
++ } \
++ if (child != waitpid(child, &status, 0)) \
++ abort(); \
++ if (WIFSIGNALED(status) || !WIFEXITED(status)) { \
++ _metadata->passed = 0; \
++ _metadata->step = 1; \
++ return; \
++ } \
++ switch (WEXITSTATUS(status)) { \
++ case 0: \
++ _metadata->passed = 1; \
++ break; \
++ case 255: \
++ _metadata->passed = 1; \
++ _metadata->skip = 1; \
++ break; \
++ default: \
++ _metadata->passed = 0; \
++ _metadata->step = WEXITSTATUS(status); \
++ break; \
++ } \
++ } \
++ static void fixture_name##_##test_name##_child( \
++ struct __test_metadata __attribute__((unused)) *_metadata, \
++ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
++ const FIXTURE_VARIANT(fixture_name) \
++ __attribute__((unused)) *variant)
++
++#ifndef landlock_create_ruleset
++static inline int landlock_create_ruleset(
++ const struct landlock_ruleset_attr *const attr,
++ const size_t size, const __u32 flags)
++{
++ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
++}
++#endif
++
++#ifndef landlock_add_rule
++static inline int landlock_add_rule(const int ruleset_fd,
++ const enum landlock_rule_type rule_type,
++ const void *const rule_attr, const __u32 flags)
++{
++ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type,
++ rule_attr, flags);
++}
++#endif
++
++#ifndef landlock_restrict_self
++static inline int landlock_restrict_self(const int ruleset_fd,
++ const __u32 flags)
++{
++ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
++}
++#endif
++
++static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
++{
++ cap_t cap_p;
++ /* Only these three capabilities are useful for the tests. */
++ const cap_value_t caps[] = {
++ CAP_DAC_OVERRIDE,
++ CAP_MKNOD,
++ CAP_SYS_ADMIN,
++ CAP_SYS_CHROOT,
++ };
++
++ cap_p = cap_get_proc();
++ EXPECT_NE(NULL, cap_p) {
++ TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
++ }
++ EXPECT_NE(-1, cap_clear(cap_p)) {
++ TH_LOG("Failed to cap_clear: %s", strerror(errno));
++ }
++ if (!drop_all) {
++ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
++ ARRAY_SIZE(caps), caps, CAP_SET)) {
++ TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
++ }
++ }
++ EXPECT_NE(-1, cap_set_proc(cap_p)) {
++ TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
++ }
++ EXPECT_NE(-1, cap_free(cap_p)) {
++ TH_LOG("Failed to cap_free: %s", strerror(errno));
++ }
++}
++
++/* We cannot put such helpers in a library because of kselftest_harness.h . */
++__attribute__((__unused__))
++static void disable_caps(struct __test_metadata *const _metadata)
++{
++ _init_caps(_metadata, false);
++}
++
++__attribute__((__unused__))
++static void drop_caps(struct __test_metadata *const _metadata)
++{
++ _init_caps(_metadata, true);
++}
++
++static void _effective_cap(struct __test_metadata *const _metadata,
++ const cap_value_t caps, const cap_flag_value_t value)
++{
++ cap_t cap_p;
++
++ cap_p = cap_get_proc();
++ EXPECT_NE(NULL, cap_p) {
++ TH_LOG("Failed to cap_get_proc: %s", strerror(errno));
++ }
++ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) {
++ TH_LOG("Failed to cap_set_flag: %s", strerror(errno));
++ }
++ EXPECT_NE(-1, cap_set_proc(cap_p)) {
++ TH_LOG("Failed to cap_set_proc: %s", strerror(errno));
++ }
++ EXPECT_NE(-1, cap_free(cap_p)) {
++ TH_LOG("Failed to cap_free: %s", strerror(errno));
++ }
++}
++
++__attribute__((__unused__))
++static void set_cap(struct __test_metadata *const _metadata,
++ const cap_value_t caps)
++{
++ _effective_cap(_metadata, caps, CAP_SET);
++}
++
++__attribute__((__unused__))
++static void clear_cap(struct __test_metadata *const _metadata,
++ const cap_value_t caps)
++{
++ _effective_cap(_metadata, caps, CAP_CLEAR);
++}
+diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
+new file mode 100644
+index 0000000000000..0f0a65287bacf
+--- /dev/null
++++ b/tools/testing/selftests/landlock/config
+@@ -0,0 +1,7 @@
++CONFIG_OVERLAY_FS=y
++CONFIG_SECURITY_LANDLOCK=y
++CONFIG_SECURITY_PATH=y
++CONFIG_SECURITY=y
++CONFIG_SHMEM=y
++CONFIG_TMPFS_XATTR=y
++CONFIG_TMPFS=y
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+new file mode 100644
+index 0000000000000..10c9a1e4ebd9b
+--- /dev/null
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -0,0 +1,2791 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Filesystem
++ *
++ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2020 ANSSI
++ * Copyright © 2020-2021 Microsoft Corporation
++ */
++
++#define _GNU_SOURCE
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <sched.h>
++#include <string.h>
++#include <sys/capability.h>
++#include <sys/mount.h>
++#include <sys/prctl.h>
++#include <sys/sendfile.h>
++#include <sys/stat.h>
++#include <sys/sysmacros.h>
++#include <unistd.h>
++
++#include "common.h"
++
++#define TMP_DIR "tmp"
++#define BINARY_PATH "./true"
++
++/* Paths (sibling number and depth) */
++static const char dir_s1d1[] = TMP_DIR "/s1d1";
++static const char file1_s1d1[] = TMP_DIR "/s1d1/f1";
++static const char file2_s1d1[] = TMP_DIR "/s1d1/f2";
++static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2";
++static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1";
++static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2";
++static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3";
++static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1";
++static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2";
++
++static const char dir_s2d1[] = TMP_DIR "/s2d1";
++static const char file1_s2d1[] = TMP_DIR "/s2d1/f1";
++static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2";
++static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1";
++static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3";
++static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1";
++static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2";
++
++static const char dir_s3d1[] = TMP_DIR "/s3d1";
++/* dir_s3d2 is a mount point. */
++static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2";
++static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
++
++/*
++ * layout1 hierarchy:
++ *
++ * tmp
++ * ├── s1d1
++ * │  ├── f1
++ * │  ├── f2
++ * │  └── s1d2
++ * │  ├── f1
++ * │  ├── f2
++ * │  └── s1d3
++ * │  ├── f1
++ * │  └── f2
++ * ├── s2d1
++ * │  ├── f1
++ * │  └── s2d2
++ * │  ├── f1
++ * │  └── s2d3
++ * │  ├── f1
++ * │  └── f2
++ * └── s3d1
++ * └── s3d2
++ * └── s3d3
++ */
++
++static void mkdir_parents(struct __test_metadata *const _metadata,
++ const char *const path)
++{
++ char *walker;
++ const char *parent;
++ int i, err;
++
++ ASSERT_NE(path[0], '\0');
++ walker = strdup(path);
++ ASSERT_NE(NULL, walker);
++ parent = walker;
++ for (i = 1; walker[i]; i++) {
++ if (walker[i] != '/')
++ continue;
++ walker[i] = '\0';
++ err = mkdir(parent, 0700);
++ ASSERT_FALSE(err && errno != EEXIST) {
++ TH_LOG("Failed to create directory \"%s\": %s",
++ parent, strerror(errno));
++ }
++ walker[i] = '/';
++ }
++ free(walker);
++}
++
++static void create_directory(struct __test_metadata *const _metadata,
++ const char *const path)
++{
++ mkdir_parents(_metadata, path);
++ ASSERT_EQ(0, mkdir(path, 0700)) {
++ TH_LOG("Failed to create directory \"%s\": %s", path,
++ strerror(errno));
++ }
++}
++
++static void create_file(struct __test_metadata *const _metadata,
++ const char *const path)
++{
++ mkdir_parents(_metadata, path);
++ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) {
++ TH_LOG("Failed to create file \"%s\": %s", path,
++ strerror(errno));
++ }
++}
++
++static int remove_path(const char *const path)
++{
++ char *walker;
++ int i, ret, err = 0;
++
++ walker = strdup(path);
++ if (!walker) {
++ err = ENOMEM;
++ goto out;
++ }
++ if (unlink(path) && rmdir(path)) {
++ if (errno != ENOENT)
++ err = errno;
++ goto out;
++ }
++ for (i = strlen(walker); i > 0; i--) {
++ if (walker[i] != '/')
++ continue;
++ walker[i] = '\0';
++ ret = rmdir(walker);
++ if (ret) {
++ if (errno != ENOTEMPTY && errno != EBUSY)
++ err = errno;
++ goto out;
++ }
++ if (strcmp(walker, TMP_DIR) == 0)
++ goto out;
++ }
++
++out:
++ free(walker);
++ return err;
++}
++
++static void prepare_layout(struct __test_metadata *const _metadata)
++{
++ disable_caps(_metadata);
++ umask(0077);
++ create_directory(_metadata, TMP_DIR);
++
++ /*
++ * Do not pollute the rest of the system: creates a private mount point
++ * for tests relying on pivot_root(2) and move_mount(2).
++ */
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, unshare(CLONE_NEWNS));
++ ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700"));
++ ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++static void cleanup_layout(struct __test_metadata *const _metadata)
++{
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, umount(TMP_DIR));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, remove_path(TMP_DIR));
++}
++
++static void create_layout1(struct __test_metadata *const _metadata)
++{
++ create_file(_metadata, file1_s1d1);
++ create_file(_metadata, file1_s1d2);
++ create_file(_metadata, file1_s1d3);
++ create_file(_metadata, file2_s1d1);
++ create_file(_metadata, file2_s1d2);
++ create_file(_metadata, file2_s1d3);
++
++ create_file(_metadata, file1_s2d1);
++ create_file(_metadata, file1_s2d2);
++ create_file(_metadata, file1_s2d3);
++ create_file(_metadata, file2_s2d3);
++
++ create_directory(_metadata, dir_s3d2);
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700"));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++
++ ASSERT_EQ(0, mkdir(dir_s3d3, 0700));
++}
++
++static void remove_layout1(struct __test_metadata *const _metadata)
++{
++ EXPECT_EQ(0, remove_path(file2_s1d3));
++ EXPECT_EQ(0, remove_path(file2_s1d2));
++ EXPECT_EQ(0, remove_path(file2_s1d1));
++ EXPECT_EQ(0, remove_path(file1_s1d3));
++ EXPECT_EQ(0, remove_path(file1_s1d2));
++ EXPECT_EQ(0, remove_path(file1_s1d1));
++
++ EXPECT_EQ(0, remove_path(file2_s2d3));
++ EXPECT_EQ(0, remove_path(file1_s2d3));
++ EXPECT_EQ(0, remove_path(file1_s2d2));
++ EXPECT_EQ(0, remove_path(file1_s2d1));
++
++ EXPECT_EQ(0, remove_path(dir_s3d3));
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ umount(dir_s3d2);
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, remove_path(dir_s3d2));
++}
++
++FIXTURE(layout1) {
++};
++
++FIXTURE_SETUP(layout1)
++{
++ prepare_layout(_metadata);
++
++ create_layout1(_metadata);
++}
++
++FIXTURE_TEARDOWN(layout1)
++{
++ remove_layout1(_metadata);
++
++ cleanup_layout(_metadata);
++}
++
++/*
++ * This helper enables to use the ASSERT_* macros and print the line number
++ * pointing to the test caller.
++ */
++static int test_open_rel(const int dirfd, const char *const path, const int flags)
++{
++ int fd;
++
++ /* Works with file and directories. */
++ fd = openat(dirfd, path, flags | O_CLOEXEC);
++ if (fd < 0)
++ return errno;
++ /*
++ * Mixing error codes from close(2) and open(2) should not lead to any
++ * (access type) confusion for this test.
++ */
++ if (close(fd) != 0)
++ return errno;
++ return 0;
++}
++
++static int test_open(const char *const path, const int flags)
++{
++ return test_open_rel(AT_FDCWD, path, flags);
++}
++
++TEST_F_FORK(layout1, no_restriction)
++{
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, inval)
++{
++ struct landlock_path_beneath_attr path_beneath = {
++ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ .parent_fd = -1,
++ };
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ };
++ int ruleset_fd;
++
++ path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
++ O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd);
++
++ ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
++ ASSERT_EQ(EBADF, errno);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
++ ASSERT_EQ(EBADFD, errno);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Gets a real ruleset. */
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++ /* Tests without O_PATH. */
++ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd);
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++ /* Tests with a ruleset FD. */
++ path_beneath.parent_fd = ruleset_fd;
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(EBADFD, errno);
++
++ /* Checks unhandled allowed_access. */
++ path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY |
++ O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd);
++
++ /* Test with legitimate values. */
++ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(EINVAL, errno);
++ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
++
++ /* Test with unknown (64-bits) value. */
++ path_beneath.allowed_access |= (1ULL << 60);
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(EINVAL, errno);
++ path_beneath.allowed_access &= ~(1ULL << 60);
++
++ /* Test with no access. */
++ path_beneath.allowed_access = 0;
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(ENOMSG, errno);
++ path_beneath.allowed_access &= ~(1ULL << 60);
++
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++
++ /* Enforces the ruleset. */
++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
++
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++#define ACCESS_FILE ( \
++ LANDLOCK_ACCESS_FS_EXECUTE | \
++ LANDLOCK_ACCESS_FS_WRITE_FILE | \
++ LANDLOCK_ACCESS_FS_READ_FILE)
++
++#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM
++
++#define ACCESS_ALL ( \
++ ACCESS_FILE | \
++ LANDLOCK_ACCESS_FS_READ_DIR | \
++ LANDLOCK_ACCESS_FS_REMOVE_DIR | \
++ LANDLOCK_ACCESS_FS_REMOVE_FILE | \
++ LANDLOCK_ACCESS_FS_MAKE_CHAR | \
++ LANDLOCK_ACCESS_FS_MAKE_DIR | \
++ LANDLOCK_ACCESS_FS_MAKE_REG | \
++ LANDLOCK_ACCESS_FS_MAKE_SOCK | \
++ LANDLOCK_ACCESS_FS_MAKE_FIFO | \
++ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
++ ACCESS_LAST)
++
++TEST_F_FORK(layout1, file_access_rights)
++{
++ __u64 access;
++ int err;
++ struct landlock_path_beneath_attr path_beneath = {};
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = ACCESS_ALL,
++ };
++ const int ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ /* Tests access rights for files. */
++ path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd);
++ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
++ path_beneath.allowed_access = access;
++ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0);
++ if ((access | ACCESS_FILE) == ACCESS_FILE) {
++ ASSERT_EQ(0, err);
++ } else {
++ ASSERT_EQ(-1, err);
++ ASSERT_EQ(EINVAL, errno);
++ }
++ }
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++static void add_path_beneath(struct __test_metadata *const _metadata,
++ const int ruleset_fd, const __u64 allowed_access,
++ const char *const path)
++{
++ struct landlock_path_beneath_attr path_beneath = {
++ .allowed_access = allowed_access,
++ };
++
++ path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd) {
++ TH_LOG("Failed to open directory \"%s\": %s", path,
++ strerror(errno));
++ }
++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0)) {
++ TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
++ strerror(errno));
++ }
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++struct rule {
++ const char *path;
++ __u64 access;
++};
++
++#define ACCESS_RO ( \
++ LANDLOCK_ACCESS_FS_READ_FILE | \
++ LANDLOCK_ACCESS_FS_READ_DIR)
++
++#define ACCESS_RW ( \
++ ACCESS_RO | \
++ LANDLOCK_ACCESS_FS_WRITE_FILE)
++
++static int create_ruleset(struct __test_metadata *const _metadata,
++ const __u64 handled_access_fs, const struct rule rules[])
++{
++ int ruleset_fd, i;
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = handled_access_fs,
++ };
++
++ ASSERT_NE(NULL, rules) {
++ TH_LOG("No rule list");
++ }
++ ASSERT_NE(NULL, rules[0].path) {
++ TH_LOG("Empty rule list");
++ }
++
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd) {
++ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
++ }
++
++ for (i = 0; rules[i].path; i++) {
++ add_path_beneath(_metadata, ruleset_fd, rules[i].access,
++ rules[i].path);
++ }
++ return ruleset_fd;
++}
++
++static void enforce_ruleset(struct __test_metadata *const _metadata,
++ const int ruleset_fd)
++{
++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) {
++ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
++ }
++}
++
++TEST_F_FORK(layout1, proc_nsfs)
++{
++ const struct rule rules[] = {
++ {
++ .path = "/dev/null",
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ struct landlock_path_beneath_attr path_beneath;
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access |
++ LANDLOCK_ACCESS_FS_READ_DIR, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY));
++ ASSERT_EQ(0, test_open("/dev/null", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY));
++
++ ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY));
++ /*
++ * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a
++ * disconnected path. Such path cannot be identified and must then be
++ * allowed.
++ */
++ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
++
++ /*
++ * Checks that it is not possible to add nsfs-like filesystem
++ * references to a ruleset.
++ */
++ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
++ ASSERT_LE(0, path_beneath.parent_fd);
++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
++ &path_beneath, 0));
++ ASSERT_EQ(EBADFD, errno);
++ ASSERT_EQ(0, close(path_beneath.parent_fd));
++}
++
++TEST_F_FORK(layout1, unpriv) {
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ int ruleset_fd;
++
++ drop_caps(_metadata);
++
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
++ ASSERT_LE(0, ruleset_fd);
++ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
++ ASSERT_EQ(EPERM, errno);
++
++ /* enforce_ruleset() calls prctl(no_new_privs). */
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, effective_access)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = ACCESS_RO,
++ },
++ {
++ .path = file1_s2d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++ char buf;
++ int reg_fd;
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Tests on a directory. */
++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++ /* Tests on a file. */
++ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++
++ /* Checks effective read and write actions. */
++ reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC);
++ ASSERT_LE(0, reg_fd);
++ ASSERT_EQ(1, write(reg_fd, ".", 1));
++ ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET));
++ ASSERT_EQ(1, read(reg_fd, &buf, 1));
++ ASSERT_EQ('.', buf);
++ ASSERT_EQ(0, close(reg_fd));
++
++ /* Just in case, double-checks effective actions. */
++ reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC);
++ ASSERT_LE(0, reg_fd);
++ ASSERT_EQ(-1, write(reg_fd, &buf, 1));
++ ASSERT_EQ(EBADF, errno);
++ ASSERT_EQ(0, close(reg_fd));
++}
++
++TEST_F_FORK(layout1, unhandled_access)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ /* Here, we only handle read accesses, not write accesses. */
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /*
++ * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE,
++ * opening for write-only should be allowed, but not read-write.
++ */
++ ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++
++ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++}
++
++TEST_F_FORK(layout1, ruleset_overlap)
++{
++ const struct rule rules[] = {
++ /* These rules should be ORed among them. */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_READ_DIR,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks s1d1 hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d2 hierarchy. */
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* Checks s1d3 hierarchy. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++}
++
++TEST_F_FORK(layout1, non_overlapping_accesses)
++{
++ const struct rule layer1[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
++ },
++ {}
++ };
++ const struct rule layer2[] = {
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ },
++ {}
++ };
++ int ruleset_fd;
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG,
++ layer1);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ layer2);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Unchanged accesses for file creation. */
++ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
++
++ /* Checks file removing. */
++ ASSERT_EQ(-1, unlink(file1_s1d2));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(0, unlink(file1_s1d3));
++}
++
++TEST_F_FORK(layout1, interleaved_masked_accesses)
++{
++ /*
++ * Checks overly restrictive rules:
++ * layer 1: allows R s1d1/s1d2/s1d3/file1
++ * layer 2: allows RW s1d1/s1d2/s1d3
++ * allows W s1d1/s1d2
++ * denies R s1d1/s1d2
++ * layer 3: allows R s1d1
++ * layer 4: allows R s1d1/s1d2
++ * denies W s1d1/s1d2
++ * layer 5: allows R s1d1/s1d2
++ * layer 6: allows X ----
++ * layer 7: allows W s1d1/s1d2
++ * denies R s1d1/s1d2
++ */
++ const struct rule layer1_read[] = {
++ /* Allows read access to file1_s1d3 with the first layer. */
++ {
++ .path = file1_s1d3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ /* First rule with write restrictions. */
++ const struct rule layer2_read_write[] = {
++ /* Start by granting read-write access via its parent directory... */
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ /* ...but also denies read access via its grandparent directory. */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ const struct rule layer3_read[] = {
++ /* Allows read access via its great-grandparent directory. */
++ {
++ .path = dir_s1d1,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ const struct rule layer4_read_write[] = {
++ /*
++ * Try to confuse the deny access by denying write (but not
++ * read) access via its grandparent directory.
++ */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ const struct rule layer5_read[] = {
++ /*
++ * Try to override layer2's deny read access by explicitly
++ * allowing read access via file1_s1d3's grandparent.
++ */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ const struct rule layer6_execute[] = {
++ /*
++ * Restricts an unrelated file hierarchy with a new access
++ * (non-overlapping) type.
++ */
++ {
++ .path = dir_s2d1,
++ .access = LANDLOCK_ACCESS_FS_EXECUTE,
++ },
++ {}
++ };
++ const struct rule layer7_read_write[] = {
++ /*
++ * Finally, denies read access to file1_s1d3 via its
++ * grandparent.
++ */
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ int ruleset_fd;
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++ layer1_read);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that read access is granted for file1_s1d3 with layer 1. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that previous access rights are unchanged with layer 2. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++ layer3_read);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that previous access rights are unchanged with layer 3. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
++
++ /* This time, denies write access for the file hierarchy. */
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /*
++ * Checks that the only change with layer 4 is that write access is
++ * denied.
++ */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
++ layer5_read);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that previous access rights are unchanged with layer 5. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
++ layer6_execute);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks that previous access rights are unchanged with layer 6. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++
++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks read access is now denied with layer 7. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, inherit_subset)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_READ_DIR,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* Write access is forbidden. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ /* Readdir access is allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* Write access is forbidden. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ /* Readdir access is allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /*
++ * Tests shared rule extension: the following rules should not grant
++ * any new access, only remove some. Once enforced, these rules are
++ * ANDed with the previous ones.
++ */
++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
++ dir_s1d2);
++ /*
++ * According to ruleset_fd, dir_s1d2 should now have the
++ * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
++ * access rights (even if this directory is opened a second time).
++ * However, when enforcing this updated ruleset, the ruleset tied to
++ * the current process (i.e. its domain) will still only have the
++ * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and
++ * LANDLOCK_ACCESS_FS_READ_DIR accesses, but
++ * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would
++ * be a privilege escalation.
++ */
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ /* Same tests and results as above. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d2. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ /* Readdir access is still allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ /* Readdir access is still allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /*
++ * Try to get more privileges by adding new access rights to the parent
++ * directory: dir_s1d1.
++ */
++ add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ /* Same tests and results as above. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d2. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ /* Readdir access is still allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ /* Readdir access is still allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /*
++ * Now, dir_s1d3 get a new rule tied to it, only allowing
++ * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is
++ * that there was no rule tied to it before.
++ */
++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
++ dir_s1d3);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /*
++ * Same tests and results as above, except for open(dir_s1d3) which is
++ * now denied because the new rule mask the rule previously inherited
++ * from dir_s1d2.
++ */
++
++ /* Same tests and results as above. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d2. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ /* Readdir access is still allowed. */
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* It is still forbidden to write in file1_s1d3. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ /*
++ * Readdir of dir_s1d3 is still allowed because of the OR policy inside
++ * the same layer.
++ */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++}
++
++TEST_F_FORK(layout1, inherit_superset)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d3,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ /* Readdir access is denied for dir_s1d2. */
++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++ /* Readdir access is allowed for dir_s1d3. */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++ /* File access is allowed for file1_s1d3. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++ /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Readdir access is still denied for dir_s1d2. */
++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++ /* Readdir access is still allowed for dir_s1d3. */
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++ /* File access is still allowed for file1_s1d3. */
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, max_layers)
++{
++ int i, err;
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ for (i = 0; i < 64; i++)
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ for (i = 0; i < 2; i++) {
++ err = landlock_restrict_self(ruleset_fd, 0);
++ ASSERT_EQ(-1, err);
++ ASSERT_EQ(E2BIG, errno);
++ }
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, empty_or_same_ruleset)
++{
++ struct landlock_ruleset_attr ruleset_attr = {};
++ int ruleset_fd;
++
++ /* Tests empty handled_access_fs. */
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(-1, ruleset_fd);
++ ASSERT_EQ(ENOMSG, errno);
++
++ /* Enforces policy which deny read access to all files. */
++ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++ /* Nests a policy which deny read access to all directories. */
++ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++
++ /* Enforces a second time with the same ruleset. */
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, rule_on_mountpoint)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d1,
++ .access = ACCESS_RO,
++ },
++ {
++ /* dir_s3d2 is a mount point. */
++ .path = dir_s3d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
++
++ ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_over_mountpoint)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d1,
++ .access = ACCESS_RO,
++ },
++ {
++ /* dir_s3d2 is a mount point. */
++ .path = dir_s3d1,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
++}
++
++/*
++ * This test verifies that we can apply a landlock rule on the root directory
++ * (which might require special handling).
++ */
++TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
++{
++ struct rule rules[] = {
++ {
++ .path = "/",
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks allowed access. */
++ ASSERT_EQ(0, test_open("/", O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++
++ rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE;
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks denied access (on a directory). */
++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_over_root_deny)
++{
++ const struct rule rules[] = {
++ {
++ .path = "/",
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks denied access (on a directory). */
++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1, rule_inside_mount_ns)
++{
++ const struct rule rules[] = {
++ {
++ .path = "s3d3",
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ int ruleset_fd;
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) {
++ TH_LOG("Failed to pivot root: %s", strerror(errno));
++ };
++ ASSERT_EQ(0, chdir("/"));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, test_open("s3d3", O_RDONLY));
++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
++}
++
++TEST_F_FORK(layout1, mount_and_pivot)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s3d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
++ ASSERT_EQ(EPERM, errno);
++ ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3));
++ ASSERT_EQ(EPERM, errno);
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++TEST_F_FORK(layout1, move_mount)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s3d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++ dir_s1d2, 0)) {
++ TH_LOG("Failed to move mount: %s", strerror(errno));
++ }
++
++ ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
++ dir_s3d2, 0));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
++ dir_s1d2, 0));
++ ASSERT_EQ(EPERM, errno);
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++TEST_F_FORK(layout1, release_inodes)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d1,
++ .access = ACCESS_RO,
++ },
++ {
++ .path = dir_s3d2,
++ .access = ACCESS_RO,
++ },
++ {
++ .path = dir_s3d3,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ /* Unmount a file hierarchy while it is being used by a ruleset. */
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, umount(dir_s3d2));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
++ /* This dir_s3d3 would not be allowed and does not exist anyway. */
++ ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
++}
++
++enum relative_access {
++ REL_OPEN,
++ REL_CHDIR,
++ REL_CHROOT_ONLY,
++ REL_CHROOT_CHDIR,
++};
++
++static void test_relative_path(struct __test_metadata *const _metadata,
++ const enum relative_access rel)
++{
++ /*
++ * Common layer to check that chroot doesn't ignore it (i.e. a chroot
++ * is not a disconnected root directory).
++ */
++ const struct rule layer1_base[] = {
++ {
++ .path = TMP_DIR,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ const struct rule layer2_subs[] = {
++ {
++ .path = dir_s1d2,
++ .access = ACCESS_RO,
++ },
++ {
++ .path = dir_s2d2,
++ .access = ACCESS_RO,
++ },
++ {}
++ };
++ int dirfd, ruleset_fd;
++
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs);
++
++ ASSERT_LE(0, ruleset_fd);
++ switch (rel) {
++ case REL_OPEN:
++ case REL_CHDIR:
++ break;
++ case REL_CHROOT_ONLY:
++ ASSERT_EQ(0, chdir(dir_s2d2));
++ break;
++ case REL_CHROOT_CHDIR:
++ ASSERT_EQ(0, chdir(dir_s1d2));
++ break;
++ default:
++ ASSERT_TRUE(false);
++ return;
++ }
++
++ set_cap(_metadata, CAP_SYS_CHROOT);
++ enforce_ruleset(_metadata, ruleset_fd);
++
++ switch (rel) {
++ case REL_OPEN:
++ dirfd = open(dir_s1d2, O_DIRECTORY);
++ ASSERT_LE(0, dirfd);
++ break;
++ case REL_CHDIR:
++ ASSERT_EQ(0, chdir(dir_s1d2));
++ dirfd = AT_FDCWD;
++ break;
++ case REL_CHROOT_ONLY:
++ /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
++ ASSERT_EQ(0, chroot("../../s1d1/s1d2")) {
++ TH_LOG("Failed to chroot: %s", strerror(errno));
++ }
++ dirfd = AT_FDCWD;
++ break;
++ case REL_CHROOT_CHDIR:
++ /* Do chroot into dir_s1d2. */
++ ASSERT_EQ(0, chroot(".")) {
++ TH_LOG("Failed to chroot: %s", strerror(errno));
++ }
++ dirfd = AT_FDCWD;
++ break;
++ }
++
++ ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
++ test_open_rel(dirfd, "..", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
++
++ if (rel == REL_CHROOT_ONLY) {
++ /* The current directory is dir_s2d2. */
++ ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY));
++ } else {
++ /* The current directory is dir_s1d2. */
++ ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY));
++ }
++
++ if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) {
++ /* Checks the root dir_s1d2. */
++ ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY));
++ }
++
++ if (rel != REL_CHROOT_CHDIR) {
++ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY));
++
++ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY));
++ }
++
++ if (rel == REL_OPEN)
++ ASSERT_EQ(0, close(dirfd));
++ ASSERT_EQ(0, close(ruleset_fd));
++}
++
++TEST_F_FORK(layout1, relative_open)
++{
++ test_relative_path(_metadata, REL_OPEN);
++}
++
++TEST_F_FORK(layout1, relative_chdir)
++{
++ test_relative_path(_metadata, REL_CHDIR);
++}
++
++TEST_F_FORK(layout1, relative_chroot_only)
++{
++ test_relative_path(_metadata, REL_CHROOT_ONLY);
++}
++
++TEST_F_FORK(layout1, relative_chroot_chdir)
++{
++ test_relative_path(_metadata, REL_CHROOT_CHDIR);
++}
++
++static void copy_binary(struct __test_metadata *const _metadata,
++ const char *const dst_path)
++{
++ int dst_fd, src_fd;
++ struct stat statbuf;
++
++ dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
++ ASSERT_LE(0, dst_fd) {
++ TH_LOG("Failed to open \"%s\": %s", dst_path,
++ strerror(errno));
++ }
++ src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
++ ASSERT_LE(0, src_fd) {
++ TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
++ strerror(errno));
++ }
++ ASSERT_EQ(0, fstat(src_fd, &statbuf));
++ ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0,
++ statbuf.st_size));
++ ASSERT_EQ(0, close(src_fd));
++ ASSERT_EQ(0, close(dst_fd));
++}
++
++static void test_execute(struct __test_metadata *const _metadata,
++ const int err, const char *const path)
++{
++ int status;
++ char *const argv[] = {(char *)path, NULL};
++ const pid_t child = fork();
++
++ ASSERT_LE(0, child);
++ if (child == 0) {
++ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) {
++ TH_LOG("Failed to execute \"%s\": %s", path,
++ strerror(errno));
++ };
++ ASSERT_EQ(err, errno);
++ _exit(_metadata->passed ? 2 : 1);
++ return;
++ }
++ ASSERT_EQ(child, waitpid(child, &status, 0));
++ ASSERT_EQ(1, WIFEXITED(status));
++ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) {
++ TH_LOG("Unexpected return code for \"%s\": %s", path,
++ strerror(errno));
++ };
++}
++
++TEST_F_FORK(layout1, execute)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_EXECUTE,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ copy_binary(_metadata, file1_s1d1);
++ copy_binary(_metadata, file1_s1d2);
++ copy_binary(_metadata, file1_s1d3);
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++ test_execute(_metadata, EACCES, file1_s1d1);
++
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ test_execute(_metadata, 0, file1_s1d2);
++
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ test_execute(_metadata, 0, file1_s1d3);
++}
++
++TEST_F_FORK(layout1, link)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlink(file1_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ /* Denies linking because of reparenting. */
++ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
++ ASSERT_EQ(EXDEV, errno);
++
++ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
++ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
++}
++
++TEST_F_FORK(layout1, rename_file)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ },
++ {
++ .path = dir_s2d2,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /*
++ * Tries to replace a file, from a directory that allows file removal,
++ * but to a different directory (which also allows file removal).
++ */
++ ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++
++ /*
++ * Tries to replace a file, from a directory that denies file removal,
++ * to a different directory (which allows file removal).
++ */
++ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++
++ /* Exchanges files and directories that partially allow removal. */
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EACCES, errno);
++
++ /* Renames files with different parents. */
++ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(0, unlink(file1_s1d3));
++ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
++ ASSERT_EQ(EXDEV, errno);
++
++ /* Exchanges and renames files with same parent. */
++ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
++
++ /* Exchanges files and directories with same parent, twice. */
++ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
++ RENAME_EXCHANGE));
++}
++
++TEST_F_FORK(layout1, rename_dir)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++ },
++ {
++ .path = dir_s2d1,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ /* Empties dir_s1d3 to allow renaming. */
++ ASSERT_EQ(0, unlink(file1_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Exchanges and renames directory to a different parent. */
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3));
++ ASSERT_EQ(EXDEV, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EXDEV, errno);
++
++ /*
++ * Exchanges directory to the same parent, which doesn't allow
++ * directory removal.
++ */
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(EACCES, errno);
++
++ /*
++ * Exchanges and renames directory to the same parent, which allows
++ * directory removal.
++ */
++ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
++ RENAME_EXCHANGE));
++ ASSERT_EQ(0, unlink(dir_s1d3));
++ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
++ ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
++ ASSERT_EQ(0, rmdir(dir_s1d3));
++}
++
++TEST_F_FORK(layout1, remove_dir)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlink(file1_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(0, rmdir(dir_s1d3));
++ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
++ ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
++
++ /* dir_s1d2 itself cannot be removed. */
++ ASSERT_EQ(-1, rmdir(dir_s1d2));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, rmdir(dir_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
++ ASSERT_EQ(EACCES, errno);
++}
++
++TEST_F_FORK(layout1, remove_file)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(-1, unlink(file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
++}
++
++static void test_make_file(struct __test_metadata *const _metadata,
++ const __u64 access, const mode_t mode, const dev_t dev)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = access,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, access, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file2_s1d1));
++ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) {
++ TH_LOG("Failed to make file \"%s\": %s",
++ file2_s1d1, strerror(errno));
++ };
++
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlink(file2_s1d2));
++
++ ASSERT_EQ(0, unlink(file1_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++
++ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) {
++ TH_LOG("Failed to make file \"%s\": %s",
++ file1_s1d2, strerror(errno));
++ };
++ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
++ ASSERT_EQ(0, unlink(file2_s1d2));
++ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
++
++ ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
++ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
++}
++
++TEST_F_FORK(layout1, make_char)
++{
++ /* Creates a /dev/null device. */
++ set_cap(_metadata, CAP_MKNOD);
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
++ makedev(1, 3));
++}
++
++TEST_F_FORK(layout1, make_block)
++{
++ /* Creates a /dev/loop0 device. */
++ set_cap(_metadata, CAP_MKNOD);
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
++ makedev(7, 0));
++}
++
++TEST_F_FORK(layout1, make_reg_1)
++{
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0);
++}
++
++TEST_F_FORK(layout1, make_reg_2)
++{
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0);
++}
++
++TEST_F_FORK(layout1, make_sock)
++{
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0);
++}
++
++TEST_F_FORK(layout1, make_fifo)
++{
++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0);
++}
++
++TEST_F_FORK(layout1, make_sym)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file2_s1d1));
++ ASSERT_EQ(0, symlink("none", file2_s1d1));
++
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlink(file2_s1d2));
++
++ ASSERT_EQ(0, unlink(file1_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(-1, symlink("none", file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
++ ASSERT_EQ(EACCES, errno);
++
++ ASSERT_EQ(0, symlink("none", file1_s1d2));
++ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
++ ASSERT_EQ(0, unlink(file2_s1d2));
++ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
++
++ ASSERT_EQ(0, symlink("none", file1_s1d3));
++ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
++ ASSERT_EQ(0, unlink(file2_s1d3));
++ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
++}
++
++TEST_F_FORK(layout1, make_dir)
++{
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
++ },
++ {}
++ };
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++
++ ASSERT_EQ(0, unlink(file1_s1d1));
++ ASSERT_EQ(0, unlink(file1_s1d2));
++ ASSERT_EQ(0, unlink(file1_s1d3));
++
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Uses file_* as directory names. */
++ ASSERT_EQ(-1, mkdir(file1_s1d1, 0700));
++ ASSERT_EQ(EACCES, errno);
++ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
++ ASSERT_EQ(0, mkdir(file1_s1d3, 0700));
++}
++
++static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
++ const int open_flags)
++{
++ static const char path_template[] = "/proc/self/fd/%d";
++ char procfd_path[sizeof(path_template) + 10];
++ const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path),
++ path_template, fd);
++
++ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
++ return open(procfd_path, open_flags);
++}
++
++TEST_F_FORK(layout1, proc_unlinked_file)
++{
++ const struct rule rules[] = {
++ {
++ .path = file1_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ int reg_fd, proc_fd;
++ const int ruleset_fd = create_ruleset(_metadata,
++ LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE, rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC);
++ ASSERT_LE(0, reg_fd);
++ ASSERT_EQ(0, unlink(file1_s1d2));
++
++ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC);
++ ASSERT_LE(0, proc_fd);
++ ASSERT_EQ(0, close(proc_fd));
++
++ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
++ ASSERT_EQ(-1, proc_fd) {
++ TH_LOG("Successfully opened /proc/self/fd/%d: %s",
++ reg_fd, strerror(errno));
++ }
++ ASSERT_EQ(EACCES, errno);
++
++ ASSERT_EQ(0, close(reg_fd));
++}
++
++TEST_F_FORK(layout1, proc_pipe)
++{
++ int proc_fd;
++ int pipe_fds[2];
++ char buf = '\0';
++ const struct rule rules[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ /* Limits read and write access to files tied to the filesystem. */
++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access,
++ rules);
++
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks enforcement for normal files. */
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
++
++ /* Checks access to pipes through FD. */
++ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
++ ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) {
++ TH_LOG("Failed to write in pipe: %s", strerror(errno));
++ }
++ ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
++ ASSERT_EQ('.', buf);
++
++ /* Checks write access to pipe through /proc/self/fd . */
++ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
++ ASSERT_LE(0, proc_fd);
++ ASSERT_EQ(1, write(proc_fd, ".", 1)) {
++ TH_LOG("Failed to write through /proc/self/fd/%d: %s",
++ pipe_fds[1], strerror(errno));
++ }
++ ASSERT_EQ(0, close(proc_fd));
++
++ /* Checks read access to pipe through /proc/self/fd . */
++ proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
++ ASSERT_LE(0, proc_fd);
++ buf = '\0';
++ ASSERT_EQ(1, read(proc_fd, &buf, 1)) {
++ TH_LOG("Failed to read through /proc/self/fd/%d: %s",
++ pipe_fds[1], strerror(errno));
++ }
++ ASSERT_EQ(0, close(proc_fd));
++
++ ASSERT_EQ(0, close(pipe_fds[0]));
++ ASSERT_EQ(0, close(pipe_fds[1]));
++}
++
++FIXTURE(layout1_bind) {
++};
++
++FIXTURE_SETUP(layout1_bind)
++{
++ prepare_layout(_metadata);
++
++ create_layout1(_metadata);
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++FIXTURE_TEARDOWN(layout1_bind)
++{
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, umount(dir_s2d2));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++
++ remove_layout1(_metadata);
++
++ cleanup_layout(_metadata);
++}
++
++static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
++static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
++
++/*
++ * layout1_bind hierarchy:
++ *
++ * tmp
++ * ├── s1d1
++ * │  ├── f1
++ * │  ├── f2
++ * │  └── s1d2
++ * │  ├── f1
++ * │  ├── f2
++ * │  └── s1d3
++ * │  ├── f1
++ * │  └── f2
++ * ├── s2d1
++ * │  ├── f1
++ * │  └── s2d2
++ * │  ├── f1
++ * │  ├── f2
++ * │  └── s1d3
++ * │  ├── f1
++ * │  └── f2
++ * └── s3d1
++ * └── s3d2
++ * └── s3d3
++ */
++
++TEST_F_FORK(layout1_bind, no_restriction)
++{
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
++ ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY));
++ ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY));
++ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
++}
++
++TEST_F_FORK(layout1_bind, same_content_same_file)
++{
++ /*
++ * Sets access right on parent directories of both source and
++ * destination mount points.
++ */
++ const struct rule layer1_parent[] = {
++ {
++ .path = dir_s1d1,
++ .access = ACCESS_RO,
++ },
++ {
++ .path = dir_s2d1,
++ .access = ACCESS_RW,
++ },
++ {}
++ };
++ /*
++ * Sets access rights on the same bind-mounted directories. The result
++ * should be ACCESS_RW for both directories, but not both hierarchies
++ * because of the first layer.
++ */
++ const struct rule layer2_mount_point[] = {
++ {
++ .path = dir_s1d2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = dir_s2d2,
++ .access = ACCESS_RW,
++ },
++ {}
++ };
++ /* Only allow read-access to the s1d3 hierarchies. */
++ const struct rule layer3_source[] = {
++ {
++ .path = dir_s1d3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {}
++ };
++ /* Removes all access rights. */
++ const struct rule layer4_destination[] = {
++ {
++ .path = bind_file1_s1d3,
++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ int ruleset_fd;
++
++ /* Sets rules for the parent directories. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks source hierarchy. */
++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* Checks destination hierarchy. */
++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR));
++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++
++ /* Sets rules for the mount points. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks source hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ /* Checks destination hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /* Sets a (shared) rule only on the source. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks source hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /* Checks destination hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
++
++ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
++ ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
++
++ /* Sets a (shared) rule only on the destination. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks source hierarchy. */
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
++
++ /* Checks destination hierarchy. */
++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
++}
++
++#define LOWER_BASE TMP_DIR "/lower"
++#define LOWER_DATA LOWER_BASE "/data"
++static const char lower_fl1[] = LOWER_DATA "/fl1";
++static const char lower_dl1[] = LOWER_DATA "/dl1";
++static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
++static const char lower_fo1[] = LOWER_DATA "/fo1";
++static const char lower_do1[] = LOWER_DATA "/do1";
++static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2";
++static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
++
++static const char (*lower_base_files[])[] = {
++ &lower_fl1,
++ &lower_fo1,
++ NULL
++};
++static const char (*lower_base_directories[])[] = {
++ &lower_dl1,
++ &lower_do1,
++ NULL
++};
++static const char (*lower_sub_files[])[] = {
++ &lower_dl1_fl2,
++ &lower_do1_fo2,
++ &lower_do1_fl3,
++ NULL
++};
++
++#define UPPER_BASE TMP_DIR "/upper"
++#define UPPER_DATA UPPER_BASE "/data"
++#define UPPER_WORK UPPER_BASE "/work"
++static const char upper_fu1[] = UPPER_DATA "/fu1";
++static const char upper_du1[] = UPPER_DATA "/du1";
++static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
++static const char upper_fo1[] = UPPER_DATA "/fo1";
++static const char upper_do1[] = UPPER_DATA "/do1";
++static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2";
++static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
++
++static const char (*upper_base_files[])[] = {
++ &upper_fu1,
++ &upper_fo1,
++ NULL
++};
++static const char (*upper_base_directories[])[] = {
++ &upper_du1,
++ &upper_do1,
++ NULL
++};
++static const char (*upper_sub_files[])[] = {
++ &upper_du1_fu2,
++ &upper_do1_fo2,
++ &upper_do1_fu3,
++ NULL
++};
++
++#define MERGE_BASE TMP_DIR "/merge"
++#define MERGE_DATA MERGE_BASE "/data"
++static const char merge_fl1[] = MERGE_DATA "/fl1";
++static const char merge_dl1[] = MERGE_DATA "/dl1";
++static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
++static const char merge_fu1[] = MERGE_DATA "/fu1";
++static const char merge_du1[] = MERGE_DATA "/du1";
++static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2";
++static const char merge_fo1[] = MERGE_DATA "/fo1";
++static const char merge_do1[] = MERGE_DATA "/do1";
++static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2";
++static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3";
++static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3";
++
++static const char (*merge_base_files[])[] = {
++ &merge_fl1,
++ &merge_fu1,
++ &merge_fo1,
++ NULL
++};
++static const char (*merge_base_directories[])[] = {
++ &merge_dl1,
++ &merge_du1,
++ &merge_do1,
++ NULL
++};
++static const char (*merge_sub_files[])[] = {
++ &merge_dl1_fl2,
++ &merge_du1_fu2,
++ &merge_do1_fo2,
++ &merge_do1_fl3,
++ &merge_do1_fu3,
++ NULL
++};
++
++/*
++ * layout2_overlay hierarchy:
++ *
++ * tmp
++ * ├── lower
++ * │  └── data
++ * │  ├── dl1
++ * │  │  └── fl2
++ * │  ├── do1
++ * │  │  ├── fl3
++ * │  │  └── fo2
++ * │  ├── fl1
++ * │  └── fo1
++ * ├── merge
++ * │  └── data
++ * │  ├── dl1
++ * │  │  └── fl2
++ * │  ├── do1
++ * │  │  ├── fl3
++ * │  │  ├── fo2
++ * │  │  └── fu3
++ * │  ├── du1
++ * │  │  └── fu2
++ * │  ├── fl1
++ * │  ├── fo1
++ * │  └── fu1
++ * └── upper
++ * ├── data
++ * │  ├── do1
++ * │  │  ├── fo2
++ * │  │  └── fu3
++ * │  ├── du1
++ * │  │  └── fu2
++ * │  ├── fo1
++ * │  └── fu1
++ * └── work
++ * └── work
++ */
++
++FIXTURE(layout2_overlay) {
++};
++
++FIXTURE_SETUP(layout2_overlay)
++{
++ prepare_layout(_metadata);
++
++ create_directory(_metadata, LOWER_BASE);
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ /* Creates tmpfs mount points to get deterministic overlayfs. */
++ ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700"));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ create_file(_metadata, lower_fl1);
++ create_file(_metadata, lower_dl1_fl2);
++ create_file(_metadata, lower_fo1);
++ create_file(_metadata, lower_do1_fo2);
++ create_file(_metadata, lower_do1_fl3);
++
++ create_directory(_metadata, UPPER_BASE);
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700"));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ create_file(_metadata, upper_fu1);
++ create_file(_metadata, upper_du1_fu2);
++ create_file(_metadata, upper_fo1);
++ create_file(_metadata, upper_do1_fo2);
++ create_file(_metadata, upper_do1_fu3);
++ ASSERT_EQ(0, mkdir(UPPER_WORK, 0700));
++
++ create_directory(_metadata, MERGE_DATA);
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ set_cap(_metadata, CAP_DAC_OVERRIDE);
++ ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
++ "lowerdir=" LOWER_DATA
++ ",upperdir=" UPPER_DATA
++ ",workdir=" UPPER_WORK));
++ clear_cap(_metadata, CAP_DAC_OVERRIDE);
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++}
++
++FIXTURE_TEARDOWN(layout2_overlay)
++{
++ EXPECT_EQ(0, remove_path(lower_do1_fl3));
++ EXPECT_EQ(0, remove_path(lower_dl1_fl2));
++ EXPECT_EQ(0, remove_path(lower_fl1));
++ EXPECT_EQ(0, remove_path(lower_do1_fo2));
++ EXPECT_EQ(0, remove_path(lower_fo1));
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, umount(LOWER_BASE));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, remove_path(LOWER_BASE));
++
++ EXPECT_EQ(0, remove_path(upper_do1_fu3));
++ EXPECT_EQ(0, remove_path(upper_du1_fu2));
++ EXPECT_EQ(0, remove_path(upper_fu1));
++ EXPECT_EQ(0, remove_path(upper_do1_fo2));
++ EXPECT_EQ(0, remove_path(upper_fo1));
++ EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, umount(UPPER_BASE));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, remove_path(UPPER_BASE));
++
++ set_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, umount(MERGE_DATA));
++ clear_cap(_metadata, CAP_SYS_ADMIN);
++ EXPECT_EQ(0, remove_path(MERGE_DATA));
++
++ cleanup_layout(_metadata);
++}
++
++TEST_F_FORK(layout2_overlay, no_restriction)
++{
++ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_do1, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY));
++ ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_du1, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_do1, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY));
++ ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY));
++
++ ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_du1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_do1, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY));
++ ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
++}
++
++#define for_each_path(path_list, path_entry, i) \
++ for (i = 0, path_entry = *path_list[i]; path_list[i]; \
++ path_entry = *path_list[++i])
++
++TEST_F_FORK(layout2_overlay, same_content_different_file)
++{
++ /* Sets access right on parent directories of both layers. */
++ const struct rule layer1_base[] = {
++ {
++ .path = LOWER_BASE,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = UPPER_BASE,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = MERGE_BASE,
++ .access = ACCESS_RW,
++ },
++ {}
++ };
++ const struct rule layer2_data[] = {
++ {
++ .path = LOWER_DATA,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = UPPER_DATA,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = MERGE_DATA,
++ .access = ACCESS_RW,
++ },
++ {}
++ };
++ /* Sets access right on directories inside both layers. */
++ const struct rule layer3_subdirs[] = {
++ {
++ .path = lower_dl1,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = lower_do1,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = upper_du1,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = upper_do1,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = merge_dl1,
++ .access = ACCESS_RW,
++ },
++ {
++ .path = merge_du1,
++ .access = ACCESS_RW,
++ },
++ {
++ .path = merge_do1,
++ .access = ACCESS_RW,
++ },
++ {}
++ };
++ /* Tighten access rights to the files. */
++ const struct rule layer4_files[] = {
++ {
++ .path = lower_dl1_fl2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = lower_do1_fo2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = lower_do1_fl3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = upper_du1_fu2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = upper_do1_fo2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = upper_do1_fu3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE,
++ },
++ {
++ .path = merge_dl1_fl2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {
++ .path = merge_du1_fu2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {
++ .path = merge_do1_fo2,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {
++ .path = merge_do1_fl3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {
++ .path = merge_do1_fu3,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ const struct rule layer5_merge_only[] = {
++ {
++ .path = MERGE_DATA,
++ .access = LANDLOCK_ACCESS_FS_READ_FILE |
++ LANDLOCK_ACCESS_FS_WRITE_FILE,
++ },
++ {}
++ };
++ int ruleset_fd;
++ size_t i;
++ const char *path_entry;
++
++ /* Sets rules on base directories (i.e. outside overlay scope). */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks lower layer. */
++ for_each_path(lower_base_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ for_each_path(lower_base_directories, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(lower_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ /* Checks upper layer. */
++ for_each_path(upper_base_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ for_each_path(upper_base_directories, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(upper_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ /*
++ * Checks that access rights are independent from the lower and upper
++ * layers: write access to upper files viewed through the merge point
++ * is still allowed, and write access to lower file viewed (and copied)
++ * through the merge point is still allowed.
++ */
++ for_each_path(merge_base_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++ for_each_path(merge_base_directories, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(merge_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++
++ /* Sets rules on data directories (i.e. inside overlay scope). */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks merge. */
++ for_each_path(merge_base_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++ for_each_path(merge_base_directories, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(merge_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++
++ /* Same checks with tighter rules. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks changes for lower layer. */
++ for_each_path(lower_base_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++ }
++ /* Checks changes for upper layer. */
++ for_each_path(upper_base_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++ }
++ /* Checks all merge accesses. */
++ for_each_path(merge_base_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++ }
++ for_each_path(merge_base_directories, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(merge_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++
++ /* Sets rules directly on overlayed files. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks unchanged accesses on lower layer. */
++ for_each_path(lower_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ /* Checks unchanged accesses on upper layer. */
++ for_each_path(upper_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
++ }
++ /* Checks all merge accesses. */
++ for_each_path(merge_base_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++ }
++ for_each_path(merge_base_directories, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(merge_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++
++ /* Only allowes access to the merge hierarchy. */
++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only);
++ ASSERT_LE(0, ruleset_fd);
++ enforce_ruleset(_metadata, ruleset_fd);
++ ASSERT_EQ(0, close(ruleset_fd));
++
++ /* Checks new accesses on lower layer. */
++ for_each_path(lower_sub_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++ }
++ /* Checks new accesses on upper layer. */
++ for_each_path(upper_sub_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
++ }
++ /* Checks all merge accesses. */
++ for_each_path(merge_base_files, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
++ }
++ for_each_path(merge_base_directories, path_entry, i) {
++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY));
++ }
++ for_each_path(merge_sub_files, path_entry, i) {
++ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
++ }
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+new file mode 100644
+index 0000000000000..15fbef9cc8496
+--- /dev/null
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -0,0 +1,337 @@
++// SPDX-License-Identifier: GPL-2.0
++/*
++ * Landlock tests - Ptrace
++ *
++ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
++ * Copyright © 2019-2020 ANSSI
++ */
++
++#define _GNU_SOURCE
++#include <errno.h>
++#include <fcntl.h>
++#include <linux/landlock.h>
++#include <signal.h>
++#include <sys/prctl.h>
++#include <sys/ptrace.h>
++#include <sys/types.h>
++#include <sys/wait.h>
++#include <unistd.h>
++
++#include "common.h"
++
++static void create_domain(struct __test_metadata *const _metadata)
++{
++ int ruleset_fd;
++ struct landlock_ruleset_attr ruleset_attr = {
++ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
++ };
++
++ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
++ sizeof(ruleset_attr), 0);
++ EXPECT_LE(0, ruleset_fd) {
++ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
++ }
++ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
++ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
++ EXPECT_EQ(0, close(ruleset_fd));
++}
++
++static int test_ptrace_read(const pid_t pid)
++{
++ static const char path_template[] = "/proc/%d/environ";
++ char procenv_path[sizeof(path_template) + 10];
++ int procenv_path_size, fd;
++
++ procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
++ path_template, pid);
++ if (procenv_path_size >= sizeof(procenv_path))
++ return E2BIG;
++
++ fd = open(procenv_path, O_RDONLY | O_CLOEXEC);
++ if (fd < 0)
++ return errno;
++ /*
++ * Mixing error codes from close(2) and open(2) should not lead to any
++ * (access type) confusion for this test.
++ */
++ if (close(fd) != 0)
++ return errno;
++ return 0;
++}
++
++FIXTURE(hierarchy) { };
++
++FIXTURE_VARIANT(hierarchy) {
++ const bool domain_both;
++ const bool domain_parent;
++ const bool domain_child;
++};
++
++/*
++ * Test multiple tracing combinations between a parent process P1 and a child
++ * process P2.
++ *
++ * Yama's scoped ptrace is presumed disabled. If enabled, this optional
++ * restriction is enforced in addition to any Landlock check, which means that
++ * all P2 requests to trace P1 would be denied.
++ */
++
++/*
++ * No domain
++ *
++ * P1-. P1 -> P2 : allow
++ * \ P2 -> P1 : allow
++ * 'P2
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
++ .domain_both = false,
++ .domain_parent = false,
++ .domain_child = false,
++};
++
++/*
++ * Child domain
++ *
++ * P1--. P1 -> P2 : allow
++ * \ P2 -> P1 : deny
++ * .'-----.
++ * | P2 |
++ * '------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
++ .domain_both = false,
++ .domain_parent = false,
++ .domain_child = true,
++};
++
++/*
++ * Parent domain
++ * .------.
++ * | P1 --. P1 -> P2 : deny
++ * '------' \ P2 -> P1 : allow
++ * '
++ * P2
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
++ .domain_both = false,
++ .domain_parent = true,
++ .domain_child = false,
++};
++
++/*
++ * Parent + child domain (siblings)
++ * .------.
++ * | P1 ---. P1 -> P2 : deny
++ * '------' \ P2 -> P1 : deny
++ * .---'--.
++ * | P2 |
++ * '------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
++ .domain_both = false,
++ .domain_parent = true,
++ .domain_child = true,
++};
++
++/*
++ * Same domain (inherited)
++ * .-------------.
++ * | P1----. | P1 -> P2 : allow
++ * | \ | P2 -> P1 : allow
++ * | ' |
++ * | P2 |
++ * '-------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
++ .domain_both = true,
++ .domain_parent = false,
++ .domain_child = false,
++};
++
++/*
++ * Inherited + child domain
++ * .-----------------.
++ * | P1----. | P1 -> P2 : allow
++ * | \ | P2 -> P1 : deny
++ * | .-'----. |
++ * | | P2 | |
++ * | '------' |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
++ .domain_both = true,
++ .domain_parent = false,
++ .domain_child = true,
++};
++
++/*
++ * Inherited + parent domain
++ * .-----------------.
++ * |.------. | P1 -> P2 : deny
++ * || P1 ----. | P2 -> P1 : allow
++ * |'------' \ |
++ * | ' |
++ * | P2 |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
++ .domain_both = true,
++ .domain_parent = true,
++ .domain_child = false,
++};
++
++/*
++ * Inherited + parent and child domain (siblings)
++ * .-----------------.
++ * | .------. | P1 -> P2 : deny
++ * | | P1 . | P2 -> P1 : deny
++ * | '------'\ |
++ * | \ |
++ * | .--'---. |
++ * | | P2 | |
++ * | '------' |
++ * '-----------------'
++ */
++FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
++ .domain_both = true,
++ .domain_parent = true,
++ .domain_child = true,
++};
++
++FIXTURE_SETUP(hierarchy)
++{ }
++
++FIXTURE_TEARDOWN(hierarchy)
++{ }
++
++/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
++TEST_F(hierarchy, trace)
++{
++ pid_t child, parent;
++ int status, err_proc_read;
++ int pipe_child[2], pipe_parent[2];
++ char buf_parent;
++ long ret;
++
++ /*
++ * Removes all effective and permitted capabilities to not interfere
++ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
++ */
++ drop_caps(_metadata);
++
++ parent = getpid();
++ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
++ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
++ if (variant->domain_both) {
++ create_domain(_metadata);
++ if (!_metadata->passed)
++ /* Aborts before forking. */
++ return;
++ }
++
++ child = fork();
++ ASSERT_LE(0, child);
++ if (child == 0) {
++ char buf_child;
++
++ ASSERT_EQ(0, close(pipe_parent[1]));
++ ASSERT_EQ(0, close(pipe_child[0]));
++ if (variant->domain_child)
++ create_domain(_metadata);
++
++ /* Waits for the parent to be in a domain, if any. */
++ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
++
++ /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */
++ err_proc_read = test_ptrace_read(parent);
++ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
++ if (variant->domain_child) {
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EPERM, errno);
++ EXPECT_EQ(EACCES, err_proc_read);
++ } else {
++ EXPECT_EQ(0, ret);
++ EXPECT_EQ(0, err_proc_read);
++ }
++ if (ret == 0) {
++ ASSERT_EQ(parent, waitpid(parent, &status, 0));
++ ASSERT_EQ(1, WIFSTOPPED(status));
++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0));
++ }
++
++ /* Tests child PTRACE_TRACEME. */
++ ret = ptrace(PTRACE_TRACEME);
++ if (variant->domain_parent) {
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EPERM, errno);
++ } else {
++ EXPECT_EQ(0, ret);
++ }
++
++ /*
++ * Signals that the PTRACE_ATTACH test is done and the
++ * PTRACE_TRACEME test is ongoing.
++ */
++ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
++
++ if (!variant->domain_parent) {
++ ASSERT_EQ(0, raise(SIGSTOP));
++ }
++
++ /* Waits for the parent PTRACE_ATTACH test. */
++ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
++ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
++ return;
++ }
++
++ ASSERT_EQ(0, close(pipe_child[1]));
++ ASSERT_EQ(0, close(pipe_parent[0]));
++ if (variant->domain_parent)
++ create_domain(_metadata);
++
++ /* Signals that the parent is in a domain, if any. */
++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
++
++ /*
++ * Waits for the child to test PTRACE_ATTACH on the parent and start
++ * testing PTRACE_TRACEME.
++ */
++ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
++
++ /* Tests child PTRACE_TRACEME. */
++ if (!variant->domain_parent) {
++ ASSERT_EQ(child, waitpid(child, &status, 0));
++ ASSERT_EQ(1, WIFSTOPPED(status));
++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
++ } else {
++ /* The child should not be traced by the parent. */
++ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
++ EXPECT_EQ(ESRCH, errno);
++ }
++
++ /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */
++ err_proc_read = test_ptrace_read(child);
++ ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
++ if (variant->domain_parent) {
++ EXPECT_EQ(-1, ret);
++ EXPECT_EQ(EPERM, errno);
++ EXPECT_EQ(EACCES, err_proc_read);
++ } else {
++ EXPECT_EQ(0, ret);
++ EXPECT_EQ(0, err_proc_read);
++ }
++ if (ret == 0) {
++ ASSERT_EQ(child, waitpid(child, &status, 0));
++ ASSERT_EQ(1, WIFSTOPPED(status));
++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
++ }
++
++ /* Signals that the parent PTRACE_ATTACH test is done. */
++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
++ ASSERT_EQ(child, waitpid(child, &status, 0));
++ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
++ WEXITSTATUS(status) != EXIT_SUCCESS)
++ _metadata->passed = 0;
++}
++
++TEST_HARNESS_MAIN
+diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c
+new file mode 100644
+index 0000000000000..3f9ccbf52783a
+--- /dev/null
++++ b/tools/testing/selftests/landlock/true.c
+@@ -0,0 +1,5 @@
++// SPDX-License-Identifier: GPL-2.0
++int main(void)
++{
++ return 0;
++}
+--
+2.39.2
+
--- /dev/null
+From 54bbaa4518388e88b866567125ccc67d87b9a423 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 05:32:29 +0000
+Subject: selftests/landlock: Skip overlayfs tests when not supported
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Xu <jeffxu@google.com>
+
+[ Upstream commit 366617a69e60610912836570546f118006ebc7cb ]
+
+overlayfs may be disabled in the kernel configuration, causing related
+tests to fail. Check that overlayfs is supported at runtime, so we can
+skip layout2_overlay.* accordingly.
+
+Signed-off-by: Jeff Xu <jeffxu@google.com>
+Reviewed-by: Guenter Roeck <groeck@chromium.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230113053229.1281774-2-jeffxu@google.com
+[mic: Reword comments and constify variables]
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/landlock/fs_test.c | 47 ++++++++++++++++++++++
+ 1 file changed, 47 insertions(+)
+
+diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
+index 10c9a1e4ebd9b..db153452b110a 100644
+--- a/tools/testing/selftests/landlock/fs_test.c
++++ b/tools/testing/selftests/landlock/fs_test.c
+@@ -11,6 +11,7 @@
+ #include <fcntl.h>
+ #include <linux/landlock.h>
+ #include <sched.h>
++#include <stdio.h>
+ #include <string.h>
+ #include <sys/capability.h>
+ #include <sys/mount.h>
+@@ -74,6 +75,40 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+ * └── s3d3
+ */
+
++static bool fgrep(FILE *const inf, const char *const str)
++{
++ char line[32];
++ const int slen = strlen(str);
++
++ while (!feof(inf)) {
++ if (!fgets(line, sizeof(line), inf))
++ break;
++ if (strncmp(line, str, slen))
++ continue;
++
++ return true;
++ }
++
++ return false;
++}
++
++static bool supports_overlayfs(void)
++{
++ bool res;
++ FILE *const inf = fopen("/proc/filesystems", "r");
++
++ /*
++ * Consider that the filesystem is supported if we cannot get the
++ * supported ones.
++ */
++ if (!inf)
++ return true;
++
++ res = fgrep(inf, "nodev\toverlay\n");
++ fclose(inf);
++ return res;
++}
++
+ static void mkdir_parents(struct __test_metadata *const _metadata,
+ const char *const path)
+ {
+@@ -2416,6 +2451,9 @@ FIXTURE(layout2_overlay) {
+
+ FIXTURE_SETUP(layout2_overlay)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ prepare_layout(_metadata);
+
+ create_directory(_metadata, LOWER_BASE);
+@@ -2453,6 +2491,9 @@ FIXTURE_SETUP(layout2_overlay)
+
+ FIXTURE_TEARDOWN(layout2_overlay)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ EXPECT_EQ(0, remove_path(lower_do1_fl3));
+ EXPECT_EQ(0, remove_path(lower_dl1_fl2));
+ EXPECT_EQ(0, remove_path(lower_fl1));
+@@ -2484,6 +2525,9 @@ FIXTURE_TEARDOWN(layout2_overlay)
+
+ TEST_F_FORK(layout2_overlay, no_restriction)
+ {
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
+@@ -2647,6 +2691,9 @@ TEST_F_FORK(layout2_overlay, same_content_different_file)
+ size_t i;
+ const char *path_entry;
+
++ if (!supports_overlayfs())
++ SKIP(return, "overlayfs is not supported");
++
+ /* Sets rules on base directories (i.e. outside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+--
+2.39.2
+
--- /dev/null
+From 16905b2ec61fefd3ae8a5ed6e13c12ce98bd579e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 14 Jan 2023 02:03:06 +0000
+Subject: selftests/landlock: Test ptrace as much as possible with Yama
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Jeff Xu <jeffxu@google.com>
+
+[ Upstream commit 8677e555f17f51321d0730b945aeb7d4b95f998f ]
+
+Update ptrace tests according to all potential Yama security policies.
+This is required to make such tests pass even if Yama is enabled.
+
+Tests are not skipped but they now check both Landlock and Yama boundary
+restrictions at run time to keep a maximum test coverage (i.e. positive
+and negative testing).
+
+Signed-off-by: Jeff Xu <jeffxu@google.com>
+Link: https://lore.kernel.org/r/20230114020306.1407195-2-jeffxu@google.com
+Cc: stable@vger.kernel.org
+[mic: Add curly braces around EXPECT_EQ() to make it build, and improve
+commit message]
+Co-developed-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Mickaël Salaün <mic@digikod.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../testing/selftests/landlock/ptrace_test.c | 113 +++++++++++++++---
+ 1 file changed, 96 insertions(+), 17 deletions(-)
+
+diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
+index 090adadfe2dc3..14745cceb059a 100644
+--- a/tools/testing/selftests/landlock/ptrace_test.c
++++ b/tools/testing/selftests/landlock/ptrace_test.c
+@@ -19,6 +19,12 @@
+
+ #include "common.h"
+
++/* Copied from security/yama/yama_lsm.c */
++#define YAMA_SCOPE_DISABLED 0
++#define YAMA_SCOPE_RELATIONAL 1
++#define YAMA_SCOPE_CAPABILITY 2
++#define YAMA_SCOPE_NO_ATTACH 3
++
+ static void create_domain(struct __test_metadata *const _metadata)
+ {
+ int ruleset_fd;
+@@ -59,6 +65,25 @@ static int test_ptrace_read(const pid_t pid)
+ return 0;
+ }
+
++static int get_yama_ptrace_scope(void)
++{
++ int ret;
++ char buf[2] = {};
++ const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
++
++ if (fd < 0)
++ return 0;
++
++ if (read(fd, buf, 1) < 0) {
++ close(fd);
++ return -1;
++ }
++
++ ret = atoi(buf);
++ close(fd);
++ return ret;
++}
++
+ /* clang-format off */
+ FIXTURE(hierarchy) {};
+ /* clang-format on */
+@@ -228,8 +253,51 @@ TEST_F(hierarchy, trace)
+ pid_t child, parent;
+ int status, err_proc_read;
+ int pipe_child[2], pipe_parent[2];
++ int yama_ptrace_scope;
+ char buf_parent;
+ long ret;
++ bool can_read_child, can_trace_child, can_read_parent, can_trace_parent;
++
++ yama_ptrace_scope = get_yama_ptrace_scope();
++ ASSERT_LE(0, yama_ptrace_scope);
++
++ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
++ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
++ yama_ptrace_scope);
++
++ /*
++ * can_read_child is true if a parent process can read its child
++ * process, which is only the case when the parent process is not
++ * isolated from the child with a dedicated Landlock domain.
++ */
++ can_read_child = !variant->domain_parent;
++
++ /*
++ * can_trace_child is true if a parent process can trace its child
++ * process. This depends on two conditions:
++ * - The parent process is not isolated from the child with a dedicated
++ * Landlock domain.
++ * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL).
++ */
++ can_trace_child = can_read_child &&
++ yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL;
++
++ /*
++ * can_read_parent is true if a child process can read its parent
++ * process, which is only the case when the child process is not
++ * isolated from the parent with a dedicated Landlock domain.
++ */
++ can_read_parent = !variant->domain_child;
++
++ /*
++ * can_trace_parent is true if a child process can trace its parent
++ * process. This depends on two conditions:
++ * - The child process is not isolated from the parent with a dedicated
++ * Landlock domain.
++ * - Yama is disabled (YAMA_SCOPE_DISABLED).
++ */
++ can_trace_parent = can_read_parent &&
++ yama_ptrace_scope <= YAMA_SCOPE_DISABLED;
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+@@ -260,16 +328,21 @@ TEST_F(hierarchy, trace)
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */
++ /* Tests PTRACE_MODE_READ on the parent. */
+ err_proc_read = test_ptrace_read(parent);
++ if (can_read_parent) {
++ EXPECT_EQ(0, err_proc_read);
++ } else {
++ EXPECT_EQ(EACCES, err_proc_read);
++ }
++
++ /* Tests PTRACE_ATTACH on the parent. */
+ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
+- if (variant->domain_child) {
++ if (can_trace_parent) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- EXPECT_EQ(EACCES, err_proc_read);
+- } else {
+- EXPECT_EQ(0, ret);
+- EXPECT_EQ(0, err_proc_read);
+ }
+ if (ret == 0) {
+ ASSERT_EQ(parent, waitpid(parent, &status, 0));
+@@ -279,11 +352,11 @@ TEST_F(hierarchy, trace)
+
+ /* Tests child PTRACE_TRACEME. */
+ ret = ptrace(PTRACE_TRACEME);
+- if (variant->domain_parent) {
++ if (can_trace_child) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- } else {
+- EXPECT_EQ(0, ret);
+ }
+
+ /*
+@@ -292,7 +365,7 @@ TEST_F(hierarchy, trace)
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+- if (!variant->domain_parent) {
++ if (can_trace_child) {
+ ASSERT_EQ(0, raise(SIGSTOP));
+ }
+
+@@ -317,7 +390,7 @@ TEST_F(hierarchy, trace)
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+- if (!variant->domain_parent) {
++ if (can_trace_child) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+@@ -327,17 +400,23 @@ TEST_F(hierarchy, trace)
+ EXPECT_EQ(ESRCH, errno);
+ }
+
+- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */
++ /* Tests PTRACE_MODE_READ on the child. */
+ err_proc_read = test_ptrace_read(child);
++ if (can_read_child) {
++ EXPECT_EQ(0, err_proc_read);
++ } else {
++ EXPECT_EQ(EACCES, err_proc_read);
++ }
++
++ /* Tests PTRACE_ATTACH on the child. */
+ ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
+- if (variant->domain_parent) {
++ if (can_trace_child) {
++ EXPECT_EQ(0, ret);
++ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+- EXPECT_EQ(EACCES, err_proc_read);
+- } else {
+- EXPECT_EQ(0, ret);
+- EXPECT_EQ(0, err_proc_read);
+ }
++
+ if (ret == 0) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+--
+2.39.2
+
--- /dev/null
+From 18e64ec0888c0efc74dd751f978d97e046941ed1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Feb 2023 17:36:46 +0800
+Subject: selftests: nft_nat: ensuring the listening side is up before starting
+ the client
+
+From: Hangbin Liu <liuhangbin@gmail.com>
+
+[ Upstream commit 2067e7a00aa604b94de31d64f29b8893b1696f26 ]
+
+The test_local_dnat_portonly() function initiates the client-side as
+soon as it sets the listening side to the background. This could lead to
+a race condition where the server may not be ready to listen. To ensure
+that the server-side is up and running before initiating the
+client-side, a delay is introduced to the test_local_dnat_portonly()
+function.
+
+Before the fix:
+ # ./nft_nat.sh
+ PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU
+ PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU
+ PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush
+ PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU
+ 2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused
+ ERROR: inet port rewrite
+
+After the fix:
+ # ./nft_nat.sh
+ PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ
+ PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ
+ PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush
+ PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ
+ PASS: inet port rewrite without l3 address
+
+Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address")
+Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/netfilter/nft_nat.sh | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
+index 4e15e81673104..67697d8ea59a5 100755
+--- a/tools/testing/selftests/netfilter/nft_nat.sh
++++ b/tools/testing/selftests/netfilter/nft_nat.sh
+@@ -404,6 +404,8 @@ EOF
+ echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+ sc_s=$!
+
++ sleep 1
++
+ result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+ if [ "$result" = "SERVER-inet" ];then
+--
+2.39.2
+
ext4-fix-warning-in-ext4_update_inline_data.patch
ext4-zero-i_disksize-when-initializing-the-bootloader-inode.patch
nfc-change-order-inside-nfc_se_io-error-path.patch
+landlock-add-object-management.patch
+selftests-landlock-add-user-space-tests.patch
+selftests-landlock-skip-overlayfs-tests-when-not-sup.patch
+udf-fix-off-by-one-error-when-discarding-preallocati.patch
+selftests-landlock-add-clang-format-exceptions.patch
+selftests-landlock-test-ptrace-as-much-as-possible-w.patch
+irq-fix-typos-in-comments.patch
+irqdomain-look-for-existing-mapping-only-once.patch
+irqdomain-refactor-__irq_domain_alloc_irqs.patch
+irqdomain-fix-mapping-creation-race.patch
+irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch
+irqdomain-fix-domain-registration-race.patch
+software-node-introduce-device_add_software_node.patch
+usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch
+usb-dwc3-pci-id-for-tiger-lake-cpu.patch
+usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch
+usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch
+usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch
+riscv-using-patchable_function_entry-instead-of-mcou.patch
+riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch
+riscv-ftrace-reduce-the-detour-code-size-to-half.patch
+iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch
+iommu-vt-d-fix-pasid-directory-pointer-coherency.patch
+efi-earlycon-replace-open-coded-strnchrnul.patch
+arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch
+risc-v-avoid-dereferening-null-regs-in-die.patch
+riscv-avoid-enabling-interrupts-in-die.patch
+riscv-add-header-include-guards-to-insn.h.patch
+scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch
+ext4-fix-possible-corruption-when-moving-a-directory.patch
+drm-nouveau-kms-nv50-remove-unused-functions.patch
+drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch
+drm-msm-fix-potential-invalid-ptr-free.patch
+drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch
+drm-msm-document-and-rename-preempt_lock.patch
+drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch
+drm-msm-a5xx-fix-context-faults-during-ring-switch.patch
+bgmac-fix-initial-chip-reset-to-support-bcm5358.patch
+nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch
+powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch
+ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch
+selftests-nft_nat-ensuring-the-listening-side-is-up-.patch
+net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch
+net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch
+net-caif-fix-use-after-free-in-cfusbl_device_notify.patch
+net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch
+net-phylib-get-rid-of-unnecessary-locking.patch
+bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch
+netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch
+netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch
+btf-fix-resolving-btf_kind_var-after-array-struct-un.patch
+net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch
+scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch
+platform-x86-mlx_platform-select-regmap-instead-of-d.patch
+net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch
+sunrpc-fix-a-server-shutdown-leak.patch
+riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch
+risc-v-don-t-check-text_mutex-during-stop_machine.patch
--- /dev/null
+From 5006088daaf713e090bef68ab9d5a8082d043762 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:11 +0300
+Subject: software node: Introduce device_add_software_node()
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit e68d0119e3284334de5650a1ac42ef4e179f895e ]
+
+This helper will register a software node and then assign
+it to device at the same time. The function will also make
+sure that the device can't have more than one software node.
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-2-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/swnode.c | 71 +++++++++++++++++++++++++++++++++++-----
+ include/linux/property.h | 3 ++
+ 2 files changed, 65 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c
+index d2fb3eb5816c3..572a53e6f2e88 100644
+--- a/drivers/base/swnode.c
++++ b/drivers/base/swnode.c
+@@ -48,6 +48,19 @@ EXPORT_SYMBOL_GPL(is_software_node);
+ struct swnode, fwnode) : NULL; \
+ })
+
++static inline struct swnode *dev_to_swnode(struct device *dev)
++{
++ struct fwnode_handle *fwnode = dev_fwnode(dev);
++
++ if (!fwnode)
++ return NULL;
++
++ if (!is_software_node(fwnode))
++ fwnode = fwnode->secondary;
++
++ return to_swnode(fwnode);
++}
++
+ static struct swnode *
+ software_node_to_swnode(const struct software_node *node)
+ {
+@@ -850,22 +863,62 @@ void fwnode_remove_software_node(struct fwnode_handle *fwnode)
+ }
+ EXPORT_SYMBOL_GPL(fwnode_remove_software_node);
+
++/**
++ * device_add_software_node - Assign software node to a device
++ * @dev: The device the software node is meant for.
++ * @swnode: The software node.
++ *
++ * This function will register @swnode and make it the secondary firmware node
++ * pointer of @dev. If @dev has no primary node, then @swnode will become the primary
++ * node.
++ */
++int device_add_software_node(struct device *dev, const struct software_node *swnode)
++{
++ int ret;
++
++ /* Only one software node per device. */
++ if (dev_to_swnode(dev))
++ return -EBUSY;
++
++ ret = software_node_register(swnode);
++ if (ret)
++ return ret;
++
++ set_secondary_fwnode(dev, software_node_fwnode(swnode));
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(device_add_software_node);
++
++/**
++ * device_remove_software_node - Remove device's software node
++ * @dev: The device with the software node.
++ *
++ * This function will unregister the software node of @dev.
++ */
++void device_remove_software_node(struct device *dev)
++{
++ struct swnode *swnode;
++
++ swnode = dev_to_swnode(dev);
++ if (!swnode)
++ return;
++
++ software_node_notify(dev, KOBJ_REMOVE);
++ set_secondary_fwnode(dev, NULL);
++ kobject_put(&swnode->kobj);
++}
++EXPORT_SYMBOL_GPL(device_remove_software_node);
++
+ int software_node_notify(struct device *dev, unsigned long action)
+ {
+- struct fwnode_handle *fwnode = dev_fwnode(dev);
+ struct swnode *swnode;
+ int ret;
+
+- if (!fwnode)
+- return 0;
+-
+- if (!is_software_node(fwnode))
+- fwnode = fwnode->secondary;
+- if (!is_software_node(fwnode))
++ swnode = dev_to_swnode(dev);
++ if (!swnode)
+ return 0;
+
+- swnode = to_swnode(fwnode);
+-
+ switch (action) {
+ case KOBJ_ADD:
+ ret = sysfs_create_link(&dev->kobj, &swnode->kobj,
+diff --git a/include/linux/property.h b/include/linux/property.h
+index 2d4542629d80b..3b6093f6bd04c 100644
+--- a/include/linux/property.h
++++ b/include/linux/property.h
+@@ -485,4 +485,7 @@ fwnode_create_software_node(const struct property_entry *properties,
+ const struct fwnode_handle *parent);
+ void fwnode_remove_software_node(struct fwnode_handle *fwnode);
+
++int device_add_software_node(struct device *dev, const struct software_node *swnode);
++void device_remove_software_node(struct device *dev);
++
+ #endif /* _LINUX_PROPERTY_H_ */
+--
+2.39.2
+
--- /dev/null
+From c2f7cb0f1f288f12942f9ae91387ec83625a8490 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 3 Mar 2023 16:08:32 -0500
+Subject: SUNRPC: Fix a server shutdown leak
+
+From: Benjamin Coddington <bcodding@redhat.com>
+
+[ Upstream commit 9ca6705d9d609441d34f8b853e1e4a6369b3b171 ]
+
+Fix a race where kthread_stop() may prevent the threadfn from ever getting
+called. If that happens the svc_rqst will not be cleaned up.
+
+Fixes: ed6473ddc704 ("NFSv4: Fix callback server shutdown")
+Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sunrpc/svc.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index d38788cd9433a..af657a482ad2d 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -800,6 +800,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
+ static int
+ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ {
++ struct svc_rqst *rqstp;
+ struct task_struct *task;
+ unsigned int state = serv->sv_nrthreads-1;
+
+@@ -808,7 +809,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs)
+ task = choose_victim(serv, pool, &state);
+ if (task == NULL)
+ break;
+- kthread_stop(task);
++ rqstp = kthread_data(task);
++ /* Did we lose a race to svo_function threadfn? */
++ if (kthread_stop(task) == -EINTR)
++ svc_exit_thread(rqstp);
+ nrservs++;
+ } while (nrservs < 0);
+ return 0;
+--
+2.39.2
+
--- /dev/null
+From a0dd1017e6c63ac7a75b9b4c039369f452bd8eb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 23 Jan 2023 14:29:15 +0100
+Subject: udf: Fix off-by-one error when discarding preallocation
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit f54aa97fb7e5329a373f9df4e5e213ced4fc8759 ]
+
+The condition determining whether the preallocation can be used had
+an off-by-one error so we didn't discard preallocation when new
+allocation was just following it. This can then confuse code in
+inode_getblk().
+
+CC: stable@vger.kernel.org
+Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/inode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/inode.c b/fs/udf/inode.c
+index 81876284a83c0..d114774ecdea8 100644
+--- a/fs/udf/inode.c
++++ b/fs/udf/inode.c
+@@ -442,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block,
+ * Block beyond EOF and prealloc extents? Just discard preallocation
+ * as it is not useful and complicates things.
+ */
+- if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents)
++ if (((loff_t)block) << inode->i_blkbits >= iinfo->i_lenExtents)
+ udf_discard_prealloc(inode);
+ udf_clear_extent_cache(inode);
+ phys = inode_getblk(inode, block, &err, &new);
+--
+2.39.2
+
--- /dev/null
+From 47186e711c93ccd622578fa4d86cc7499dfb1aff Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Feb 2023 15:27:11 +0200
+Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-M
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 8e5248c3a8778f3e394e9a19195bc7a48f567ca2 ]
+
+This patch adds the necessary PCI IDs for Intel Meteor Lake-M
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230215132711.35668-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 57f4b068cf29b..054429e9b7152 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -42,6 +42,7 @@
+ #define PCI_DEVICE_ID_INTEL_JSP 0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
+ #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
++#define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1
+ #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
+ #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
+ #define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+@@ -394,6 +395,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLM),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+--
+2.39.2
+
--- /dev/null
+From 00f613c8f462bab5d623365c67b5ca8defeca5e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 25 Apr 2022 13:35:18 +0300
+Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-P
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 973e0f7a847ef13ade840d4c30729ce329a66895 ]
+
+This patch adds the necessary PCI IDs for Intel Meteor Lake-P
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Link: https://lore.kernel.org/r/20220425103518.44028-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index cc97cbb67e5f4..57f4b068cf29b 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -42,6 +42,8 @@
+ #define PCI_DEVICE_ID_INTEL_JSP 0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
+ #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
++#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1
++#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e
+ #define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+
+ #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+@@ -392,6 +394,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+--
+2.39.2
+
--- /dev/null
+From 5dbce0765acfe614da7dd457dd1eff8e896594e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 14 Feb 2022 17:19:48 +0300
+Subject: usb: dwc3: pci: add support for the Intel Raptor Lake-S
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 038438a25c45d5ac996e95a22fa9e76ff3d1f8c7 ]
+
+This patch adds the necessary PCI ID for Intel Raptor Lake-S
+devices.
+
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20220214141948.18637-1-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 114d02ebf128a..cc97cbb67e5f4 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -41,6 +41,7 @@
+ #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee
+ #define PCI_DEVICE_ID_INTEL_JSP 0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
++#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61
+ #define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+
+ #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+@@ -388,6 +389,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+--
+2.39.2
+
--- /dev/null
+From 1ca135a6080ac8408989feb5e07d261595eba264 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:13 +0300
+Subject: usb: dwc3: pci: ID for Tiger Lake CPU
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit 73203bde3a95a48f27b2454dc6b955280c641afe ]
+
+Tiger Lake SOC (the versions of it that have integrated USB4
+controller) may have two DWC3 controllers. One is part of
+the PCH (Platform Controller Hub, i.e. the chipset) as
+usual, and the other is inside the actual CPU block.
+
+On all Intel platforms that have the two separate DWC3
+controllers, the one inside the CPU handles USB3 and only
+USB3 traffic, while the PCH version handles USB2 and USB2
+alone. The reason for splitting the two busses like this is
+to allow easy USB3 tunneling over USB4 connections. As USB2
+is not tunneled over USB4, it has dedicated USB controllers
+(both xHCI and DWC3).
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-4-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index 70cdb59c04c81..114d02ebf128a 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -41,6 +41,7 @@
+ #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee
+ #define PCI_DEVICE_ID_INTEL_JSP 0x4dee
+ #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1
++#define PCI_DEVICE_ID_INTEL_TGL 0x9a15
+
+ #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511"
+ #define PCI_INTEL_BXT_FUNC_PMU_PWR 4
+@@ -387,6 +388,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL),
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
++
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB),
+ (kernel_ulong_t) &dwc3_pci_amd_swnode, },
+ { } /* Terminating Entry */
+--
+2.39.2
+
--- /dev/null
+From 7772a4337dc5115df15e37110e5c52e0db82c0da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jan 2021 12:49:12 +0300
+Subject: usb: dwc3: pci: Register a software node for the dwc3 platform device
+
+From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+
+[ Upstream commit e492ce9bcaa1c9661cd3dd6cff0eedf2fa640f31 ]
+
+By registering the software node directly instead of just
+the properties in it, the driver can take advantage of also
+the other features the software nodes have.
+
+Acked-by: Felipe Balbi <balbi@kernel.org>
+Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
+Link: https://lore.kernel.org/r/20210115094914.88401-3-heikki.krogerus@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/dwc3-pci.c | 61 ++++++++++++++++++++++---------------
+ 1 file changed, 37 insertions(+), 24 deletions(-)
+
+diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
+index a5a8c5712bce4..70cdb59c04c81 100644
+--- a/drivers/usb/dwc3/dwc3-pci.c
++++ b/drivers/usb/dwc3/dwc3-pci.c
+@@ -145,6 +145,18 @@ static const struct property_entry dwc3_pci_amd_properties[] = {
+ {}
+ };
+
++static const struct software_node dwc3_pci_intel_swnode = {
++ .properties = dwc3_pci_intel_properties,
++};
++
++static const struct software_node dwc3_pci_intel_mrfld_swnode = {
++ .properties = dwc3_pci_mrfld_properties,
++};
++
++static const struct software_node dwc3_pci_amd_swnode = {
++ .properties = dwc3_pci_amd_properties,
++};
++
+ static int dwc3_pci_quirks(struct dwc3_pci *dwc)
+ {
+ struct pci_dev *pdev = dwc->pci;
+@@ -225,7 +237,6 @@ static void dwc3_pci_resume_work(struct work_struct *work)
+
+ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ {
+- struct property_entry *p = (struct property_entry *)id->driver_data;
+ struct dwc3_pci *dwc;
+ struct resource res[2];
+ int ret;
+@@ -268,7 +279,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+ dwc->dwc3->dev.parent = dev;
+ ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev));
+
+- ret = platform_device_add_properties(dwc->dwc3, p);
++ ret = device_add_software_node(&dwc->dwc3->dev, (void *)id->driver_data);
+ if (ret < 0)
+ goto err;
+
+@@ -291,6 +302,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id)
+
+ return 0;
+ err:
++ device_remove_software_node(&dwc->dwc3->dev);
+ platform_device_put(dwc->dwc3);
+ return ret;
+ }
+@@ -307,75 +319,76 @@ static void dwc3_pci_remove(struct pci_dev *pci)
+ #endif
+ device_init_wakeup(&pci->dev, false);
+ pm_runtime_get(&pci->dev);
++ device_remove_software_node(&dwc->dwc3->dev);
+ platform_device_unregister(dwc->dwc3);
+ }
+
+ static const struct pci_device_id dwc3_pci_id_table[] = {
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BSW),
+- (kernel_ulong_t) &dwc3_pci_intel_properties },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD),
+- (kernel_ulong_t) &dwc3_pci_mrfld_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTH),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT_M),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_APL),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_KBP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_GLK),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPH),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPV),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICLLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_EHLLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPLP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPH),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_JSP),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS),
+- (kernel_ulong_t) &dwc3_pci_intel_properties, },
++ (kernel_ulong_t) &dwc3_pci_intel_swnode, },
+
+ { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB),
+- (kernel_ulong_t) &dwc3_pci_amd_properties, },
++ (kernel_ulong_t) &dwc3_pci_amd_swnode, },
+ { } /* Terminating Entry */
+ };
+ MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table);
+--
+2.39.2
+