From: Sasha Levin Date: Mon, 13 Mar 2023 12:06:05 +0000 (-0400) Subject: Fixes for 5.10 X-Git-Tag: v4.14.310~87^2~3 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=137f0a8edfb459d47ad2fe0fb79c8da8693c4744;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.10 Signed-off-by: Sasha Levin --- diff --git a/queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch b/queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch new file mode 100644 index 00000000000..23f2fa7e28e --- /dev/null +++ b/queue-5.10/arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch @@ -0,0 +1,104 @@ +From e71e11098ba2822015d64028427de64e99a16ff0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 17:10:47 +0100 +Subject: arm64: efi: Make efi_rt_lock a raw_spinlock + +From: Pierre Gondois + +[ Upstream commit 0e68b5517d3767562889f1d83fdb828c26adb24f ] + +Running a rt-kernel base on 6.2.0-rc3-rt1 on an Ampere Altra outputs +the following: + BUG: sleeping function called from invalid context at kernel/locking/spinlock_rt.c:46 + in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 9, name: kworker/u320:0 + preempt_count: 2, expected: 0 + RCU nest depth: 0, expected: 0 + 3 locks held by kworker/u320:0/9: + #0: ffff3fff8c27d128 ((wq_completion)efi_rts_wq){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41) + #1: ffff80000861bdd0 ((work_completion)(&efi_rts_work.work)){+.+.}-{0:0}, at: process_one_work (./include/linux/atomic/atomic-long.h:41) + #2: ffffdf7e1ed3e460 (efi_rt_lock){+.+.}-{3:3}, at: efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101) + Preemption disabled at: + efi_virtmap_load (./arch/arm64/include/asm/mmu_context.h:248) + CPU: 0 PID: 9 Comm: kworker/u320:0 Tainted: G W 6.2.0-rc3-rt1 + Hardware name: WIWYNN Mt.Jade Server System B81.03001.0005/Mt.Jade Motherboard, BIOS 1.08.20220218 (SCP: 1.08.20220218) 2022/02/18 + Workqueue: efi_rts_wq efi_call_rts + Call trace: + dump_backtrace (arch/arm64/kernel/stacktrace.c:158) + show_stack (arch/arm64/kernel/stacktrace.c:165) + dump_stack_lvl (lib/dump_stack.c:107 (discriminator 4)) + dump_stack (lib/dump_stack.c:114) + __might_resched (kernel/sched/core.c:10134) + rt_spin_lock (kernel/locking/rtmutex.c:1769 (discriminator 4)) + efi_call_rts (drivers/firmware/efi/runtime-wrappers.c:101) + [...] + +This seems to come from commit ff7a167961d1 ("arm64: efi: Execute +runtime services from a dedicated stack") which adds a spinlock. This +spinlock is taken through: +efi_call_rts() +\-efi_call_virt() + \-efi_call_virt_pointer() + \-arch_efi_call_virt_setup() + +Make 'efi_rt_lock' a raw_spinlock to avoid being preempted. + +[ardb: The EFI runtime services are called with a different set of + translation tables, and are permitted to use the SIMD registers. + The context switch code preserves/restores neither, and so EFI + calls must be made with preemption disabled, rather than only + disabling migration.] + +Fixes: ff7a167961d1 ("arm64: efi: Execute runtime services from a dedicated stack") +Signed-off-by: Pierre Gondois +Cc: # v6.1+ +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + arch/arm64/include/asm/efi.h | 6 +++--- + arch/arm64/kernel/efi.c | 2 +- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h +index 16892f0d05ad6..538b6a1b198b9 100644 +--- a/arch/arm64/include/asm/efi.h ++++ b/arch/arm64/include/asm/efi.h +@@ -25,7 +25,7 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + ({ \ + efi_virtmap_load(); \ + __efi_fpsimd_begin(); \ +- spin_lock(&efi_rt_lock); \ ++ raw_spin_lock(&efi_rt_lock); \ + }) + + #define arch_efi_call_virt(p, f, args...) \ +@@ -37,12 +37,12 @@ int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md); + + #define arch_efi_call_virt_teardown() \ + ({ \ +- spin_unlock(&efi_rt_lock); \ ++ raw_spin_unlock(&efi_rt_lock); \ + __efi_fpsimd_end(); \ + efi_virtmap_unload(); \ + }) + +-extern spinlock_t efi_rt_lock; ++extern raw_spinlock_t efi_rt_lock; + efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); + + #define ARCH_EFI_IRQ_FLAGS_MASK (PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT) +diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c +index 72f432d23ec5c..3ee3b3daca47b 100644 +--- a/arch/arm64/kernel/efi.c ++++ b/arch/arm64/kernel/efi.c +@@ -144,7 +144,7 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) + return s; + } + +-DEFINE_SPINLOCK(efi_rt_lock); ++DEFINE_RAW_SPINLOCK(efi_rt_lock); + + asmlinkage u64 *efi_rt_stack_top __ro_after_init; + +-- +2.39.2 + diff --git a/queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch b/queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch new file mode 100644 index 00000000000..ace740fdecf --- /dev/null +++ b/queue-5.10/bgmac-fix-initial-chip-reset-to-support-bcm5358.patch @@ -0,0 +1,92 @@ +From d4d1fc0c2871c7e8f9ee89b3e532ad1d1688edcb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 10:11:56 +0100 +Subject: bgmac: fix *initial* chip reset to support BCM5358 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rafał Miłecki + +[ Upstream commit f99e6d7c4ed3be2531bd576425a5bd07fb133bd7 ] + +While bringing hardware up we should perform a full reset including the +switch bit (BGMAC_BCMA_IOCTL_SW_RESET aka SICF_SWRST). It's what +specification says and what reference driver does. + +This seems to be critical for the BCM5358. Without this hardware doesn't +get initialized properly and doesn't seem to transmit or receive any +packets. + +Originally bgmac was calling bgmac_chip_reset() before setting +"has_robosw" property which resulted in expected behaviour. That has +changed as a side effect of adding platform device support which +regressed BCM5358 support. + +Fixes: f6a95a24957a ("net: ethernet: bgmac: Add platform device support") +Cc: Jon Mason +Signed-off-by: Rafał Miłecki +Reviewed-by: Leon Romanovsky +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/20230227091156.19509-1-zajec5@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bgmac.c | 8 ++++++-- + drivers/net/ethernet/broadcom/bgmac.h | 2 ++ + 2 files changed, 8 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c +index 9960127f612ea..bb999e67d7736 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.c ++++ b/drivers/net/ethernet/broadcom/bgmac.c +@@ -890,13 +890,13 @@ static void bgmac_chip_reset_idm_config(struct bgmac *bgmac) + + if (iost & BGMAC_BCMA_IOST_ATTACHED) { + flags = BGMAC_BCMA_IOCTL_SW_CLKEN; +- if (!bgmac->has_robosw) ++ if (bgmac->in_init || !bgmac->has_robosw) + flags |= BGMAC_BCMA_IOCTL_SW_RESET; + } + bgmac_clk_enable(bgmac, flags); + } + +- if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) ++ if (iost & BGMAC_BCMA_IOST_ATTACHED && (bgmac->in_init || !bgmac->has_robosw)) + bgmac_idm_write(bgmac, BCMA_IOCTL, + bgmac_idm_read(bgmac, BCMA_IOCTL) & + ~BGMAC_BCMA_IOCTL_SW_RESET); +@@ -1490,6 +1490,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) + struct net_device *net_dev = bgmac->net_dev; + int err; + ++ bgmac->in_init = true; ++ + bgmac_chip_intrs_off(bgmac); + + net_dev->irq = bgmac->irq; +@@ -1542,6 +1544,8 @@ int bgmac_enet_probe(struct bgmac *bgmac) + /* Omit FCS from max MTU size */ + net_dev->max_mtu = BGMAC_RX_MAX_FRAME_SIZE - ETH_FCS_LEN; + ++ bgmac->in_init = false; ++ + err = register_netdev(bgmac->net_dev); + if (err) { + dev_err(bgmac->dev, "Cannot register net device\n"); +diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h +index 351c598a3ec6d..d1200b27af1ed 100644 +--- a/drivers/net/ethernet/broadcom/bgmac.h ++++ b/drivers/net/ethernet/broadcom/bgmac.h +@@ -512,6 +512,8 @@ struct bgmac { + int irq; + u32 int_mask; + ++ bool in_init; ++ + /* Current MAC state */ + int mac_speed; + int mac_duplex; +-- +2.39.2 + diff --git a/queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch b/queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch new file mode 100644 index 00000000000..fcfd493719a --- /dev/null +++ b/queue-5.10/bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch @@ -0,0 +1,114 @@ +From a11bbbffc1c785fdd6e539212e262c6a47fe0eb6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 18:43:57 -0800 +Subject: bnxt_en: Avoid order-5 memory allocation for TPA data + +From: Michael Chan + +[ Upstream commit accd7e23693aaaa9aa0d3e9eca0ae77d1be80ab3 ] + +The driver needs to keep track of all the possible concurrent TPA (GRO/LRO) +completions on the aggregation ring. On P5 chips, the maximum number +of concurrent TPA is 256 and the amount of memory we allocate is order-5 +on systems using 4K pages. Memory allocation failure has been reported: + +NetworkManager: page allocation failure: order:5, mode:0x40dc0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null),cpuset=/,mems_allowed=0-1 +CPU: 15 PID: 2995 Comm: NetworkManager Kdump: loaded Not tainted 5.10.156 #1 +Hardware name: Dell Inc. PowerEdge R660/0M1CC5, BIOS 0.2.25 08/12/2022 +Call Trace: + dump_stack+0x57/0x6e + warn_alloc.cold.120+0x7b/0xdd + ? _cond_resched+0x15/0x30 + ? __alloc_pages_direct_compact+0x15f/0x170 + __alloc_pages_slowpath.constprop.108+0xc58/0xc70 + __alloc_pages_nodemask+0x2d0/0x300 + kmalloc_order+0x24/0xe0 + kmalloc_order_trace+0x19/0x80 + bnxt_alloc_mem+0x1150/0x15c0 [bnxt_en] + ? bnxt_get_func_stat_ctxs+0x13/0x60 [bnxt_en] + __bnxt_open_nic+0x12e/0x780 [bnxt_en] + bnxt_open+0x10b/0x240 [bnxt_en] + __dev_open+0xe9/0x180 + __dev_change_flags+0x1af/0x220 + dev_change_flags+0x21/0x60 + do_setlink+0x35c/0x1100 + +Instead of allocating this big chunk of memory and dividing it up for the +concurrent TPA instances, allocate each small chunk separately for each +TPA instance. This will reduce it to order-0 allocations. + +Fixes: 79632e9ba386 ("bnxt_en: Expand bnxt_tpa_info struct to support 57500 chips.") +Reviewed-by: Somnath Kotur +Reviewed-by: Damodharam Ammepalli +Reviewed-by: Pavan Chebbi +Signed-off-by: Michael Chan +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/broadcom/bnxt/bnxt.c | 23 ++++++++++++----------- + 1 file changed, 12 insertions(+), 11 deletions(-) + +diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +index c4a768ce8c99d..6928c0b578abb 100644 +--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c ++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c +@@ -2854,7 +2854,7 @@ static int bnxt_alloc_ring(struct bnxt *bp, struct bnxt_ring_mem_info *rmem) + + static void bnxt_free_tpa_info(struct bnxt *bp) + { +- int i; ++ int i, j; + + for (i = 0; i < bp->rx_nr_rings; i++) { + struct bnxt_rx_ring_info *rxr = &bp->rx_ring[i]; +@@ -2862,8 +2862,10 @@ static void bnxt_free_tpa_info(struct bnxt *bp) + kfree(rxr->rx_tpa_idx_map); + rxr->rx_tpa_idx_map = NULL; + if (rxr->rx_tpa) { +- kfree(rxr->rx_tpa[0].agg_arr); +- rxr->rx_tpa[0].agg_arr = NULL; ++ for (j = 0; j < bp->max_tpa; j++) { ++ kfree(rxr->rx_tpa[j].agg_arr); ++ rxr->rx_tpa[j].agg_arr = NULL; ++ } + } + kfree(rxr->rx_tpa); + rxr->rx_tpa = NULL; +@@ -2872,14 +2874,13 @@ static void bnxt_free_tpa_info(struct bnxt *bp) + + static int bnxt_alloc_tpa_info(struct bnxt *bp) + { +- int i, j, total_aggs = 0; ++ int i, j; + + bp->max_tpa = MAX_TPA; + if (bp->flags & BNXT_FLAG_CHIP_P5) { + if (!bp->max_tpa_v2) + return 0; + bp->max_tpa = max_t(u16, bp->max_tpa_v2, MAX_TPA_P5); +- total_aggs = bp->max_tpa * MAX_SKB_FRAGS; + } + + for (i = 0; i < bp->rx_nr_rings; i++) { +@@ -2893,12 +2894,12 @@ static int bnxt_alloc_tpa_info(struct bnxt *bp) + + if (!(bp->flags & BNXT_FLAG_CHIP_P5)) + continue; +- agg = kcalloc(total_aggs, sizeof(*agg), GFP_KERNEL); +- rxr->rx_tpa[0].agg_arr = agg; +- if (!agg) +- return -ENOMEM; +- for (j = 1; j < bp->max_tpa; j++) +- rxr->rx_tpa[j].agg_arr = agg + j * MAX_SKB_FRAGS; ++ for (j = 0; j < bp->max_tpa; j++) { ++ agg = kcalloc(MAX_SKB_FRAGS, sizeof(*agg), GFP_KERNEL); ++ if (!agg) ++ return -ENOMEM; ++ rxr->rx_tpa[j].agg_arr = agg; ++ } + rxr->rx_tpa_idx_map = kzalloc(sizeof(*rxr->rx_tpa_idx_map), + GFP_KERNEL); + if (!rxr->rx_tpa_idx_map) +-- +2.39.2 + diff --git a/queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch b/queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch new file mode 100644 index 00000000000..35a8169a380 --- /dev/null +++ b/queue-5.10/btf-fix-resolving-btf_kind_var-after-array-struct-un.patch @@ -0,0 +1,98 @@ +From 01a893a6cbfda04f112334facb1d06d1465492ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 Mar 2023 11:21:37 +0000 +Subject: btf: fix resolving BTF_KIND_VAR after ARRAY, STRUCT, UNION, PTR + +From: Lorenz Bauer + +[ Upstream commit 9b459804ff9973e173fabafba2a1319f771e85fa ] + +btf_datasec_resolve contains a bug that causes the following BTF +to fail loading: + + [1] DATASEC a size=2 vlen=2 + type_id=4 offset=0 size=1 + type_id=7 offset=1 size=1 + [2] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) + [3] PTR (anon) type_id=2 + [4] VAR a type_id=3 linkage=0 + [5] INT (anon) size=1 bits_offset=0 nr_bits=8 encoding=(none) + [6] TYPEDEF td type_id=5 + [7] VAR b type_id=6 linkage=0 + +This error message is printed during btf_check_all_types: + + [1] DATASEC a size=2 vlen=2 + type_id=7 offset=1 size=1 Invalid type + +By tracing btf_*_resolve we can pinpoint the problem: + + btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_TBD) = 0 + btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_TBD) = 0 + btf_ptr_resolve(depth: 3, type_id: 3, mode: RESOLVE_PTR) = 0 + btf_var_resolve(depth: 2, type_id: 4, mode: RESOLVE_PTR) = 0 + btf_datasec_resolve(depth: 1, type_id: 1, mode: RESOLVE_PTR) = -22 + +The last invocation of btf_datasec_resolve should invoke btf_var_resolve +by means of env_stack_push, instead it returns EINVAL. The reason is that +env_stack_push is never executed for the second VAR. + + if (!env_type_is_resolve_sink(env, var_type) && + !env_type_is_resolved(env, var_type_id)) { + env_stack_set_next_member(env, i + 1); + return env_stack_push(env, var_type, var_type_id); + } + +env_type_is_resolve_sink() changes its behaviour based on resolve_mode. +For RESOLVE_PTR, we can simplify the if condition to the following: + + (btf_type_is_modifier() || btf_type_is_ptr) && !env_type_is_resolved() + +Since we're dealing with a VAR the clause evaluates to false. This is +not sufficient to trigger the bug however. The log output and EINVAL +are only generated if btf_type_id_size() fails. + + if (!btf_type_id_size(btf, &type_id, &type_size)) { + btf_verifier_log_vsi(env, v->t, vsi, "Invalid type"); + return -EINVAL; + } + +Most types are sized, so for example a VAR referring to an INT is not a +problem. The bug is only triggered if a VAR points at a modifier. Since +we skipped btf_var_resolve that modifier was also never resolved, which +means that btf_resolved_type_id returns 0 aka VOID for the modifier. +This in turn causes btf_type_id_size to return NULL, triggering EINVAL. + +To summarise, the following conditions are necessary: + +- VAR pointing at PTR, STRUCT, UNION or ARRAY +- Followed by a VAR pointing at TYPEDEF, VOLATILE, CONST, RESTRICT or + TYPE_TAG + +The fix is to reset resolve_mode to RESOLVE_TBD before attempting to +resolve a VAR from a DATASEC. + +Fixes: 1dc92851849c ("bpf: kernel side support for BTF Var and DataSec") +Signed-off-by: Lorenz Bauer +Link: https://lore.kernel.org/r/20230306112138.155352-2-lmb@isovalent.com +Signed-off-by: Martin KaFai Lau +Signed-off-by: Sasha Levin +--- + kernel/bpf/btf.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c +index 11b612e94e4e1..cb80d18a49b56 100644 +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -3541,6 +3541,7 @@ static int btf_datasec_resolve(struct btf_verifier_env *env, + struct btf *btf = env->btf; + u16 i; + ++ env->resolve_mode = RESOLVE_TBD; + for_each_vsi_from(i, v->next_member, v->t, vsi) { + u32 var_type_id = vsi->type, type_id, type_size = 0; + const struct btf_type *var_type = btf_type_by_id(env->btf, +-- +2.39.2 + diff --git a/queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch b/queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch new file mode 100644 index 00000000000..0b6306bfaad --- /dev/null +++ b/queue-5.10/drm-msm-a5xx-fix-context-faults-during-ring-switch.patch @@ -0,0 +1,49 @@ +From ba49194fa0fa953883ceba33e42bab3310521e19 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:56 +0300 +Subject: drm/msm/a5xx: fix context faults during ring switch + +From: Dmitry Baryshkov + +[ Upstream commit 32e7083429d46f29080626fe387ff90c086b1fbe ] + +The rptr_addr is set in the preempt_init_ring(), which is called from +a5xx_gpu_init(). It uses shadowptr() to set the address, however the +shadow_iova is not yet initialized at that time. Move the rptr_addr +setting to the a5xx_preempt_hw_init() which is called after setting the +shadow_iova, getting the correct value for the address. + +Fixes: 8907afb476ac ("drm/msm: Allow a5xx to mark the RPTR shadow as privileged") +Suggested-by: Rob Clark +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522640/ +Link: https://lore.kernel.org/r/20230214020956.164473-5-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +index 9da0aff0072d7..b8e71ad6f8d8a 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +@@ -210,6 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) + a5xx_gpu->preempt[i]->wptr = 0; + a5xx_gpu->preempt[i]->rptr = 0; + a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; ++ a5xx_gpu->preempt[i]->rptr_addr = shadowptr(a5xx_gpu, gpu->rb[i]); + } + + /* Write a 0 to signal that we aren't switching pagetables */ +@@ -261,7 +262,6 @@ static int preempt_init_ring(struct a5xx_gpu *a5xx_gpu, + ptr->data = 0; + ptr->cntl = MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE; + +- ptr->rptr_addr = shadowptr(a5xx_gpu, ring); + ptr->counter = counters_iova; + + return 0; +-- +2.39.2 + diff --git a/queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch b/queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch new file mode 100644 index 00000000000..0e3eea0cdb9 --- /dev/null +++ b/queue-5.10/drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch @@ -0,0 +1,41 @@ +From 6bd9ad57e2a4d722982c28bea33c15721d1111fa Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:53 +0300 +Subject: drm/msm/a5xx: fix setting of the CP_PREEMPT_ENABLE_LOCAL register + +From: Dmitry Baryshkov + +[ Upstream commit a7a4c19c36de1e4b99b06e4060ccc8ab837725bc ] + +Rather than writing CP_PREEMPT_ENABLE_GLOBAL twice, follow the vendor +kernel and set CP_PREEMPT_ENABLE_LOCAL register instead. a5xx_submit() +will override it during submission, but let's get the sequence correct. + +Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522638/ +Link: https://lore.kernel.org/r/20230214020956.164473-2-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +index 0ca7e53db112a..64da65ae6d67e 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +@@ -144,8 +144,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) + OUT_RING(ring, 1); + + /* Enable local preemption for finegrain preemption */ +- OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); +- OUT_RING(ring, 0x02); ++ OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1); ++ OUT_RING(ring, 0x1); + + /* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */ + OUT_PKT7(ring, CP_YIELD_ENABLE, 1); +-- +2.39.2 + diff --git a/queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch b/queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch new file mode 100644 index 00000000000..71e32dd0c07 --- /dev/null +++ b/queue-5.10/drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch @@ -0,0 +1,42 @@ +From 0eeb7ba42b85bfcad5a307f6b26dce6f8b22f535 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 14 Feb 2023 05:09:55 +0300 +Subject: drm/msm/a5xx: fix the emptyness check in the preempt code + +From: Dmitry Baryshkov + +[ Upstream commit b4fb748f0b734ce1d2e7834998cc599fcbd25d67 ] + +Quoting Yassine: ring->memptrs->rptr is never updated and stays 0, so +the comparison always evaluates to false and get_next_ring always +returns ring 0 thinking it isn't empty. + +Fix this by calling get_rptr() instead of reading rptr directly. + +Reported-by: Yassine Oudjana +Fixes: b1fc2839d2f9 ("drm/msm: Implement preemption for A5XX targets") +Signed-off-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/522642/ +Link: https://lore.kernel.org/r/20230214020956.164473-4-dmitry.baryshkov@linaro.org +Signed-off-by: Rob Clark +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +index 183de1139eeb6..9da0aff0072d7 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +@@ -63,7 +63,7 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) + struct msm_ringbuffer *ring = gpu->rb[i]; + + spin_lock_irqsave(&ring->preempt_lock, flags); +- empty = (get_wptr(ring) == ring->memptrs->rptr); ++ empty = (get_wptr(ring) == gpu->funcs->get_rptr(gpu, ring)); + spin_unlock_irqrestore(&ring->preempt_lock, flags); + + if (!empty) +-- +2.39.2 + diff --git a/queue-5.10/drm-msm-document-and-rename-preempt_lock.patch b/queue-5.10/drm-msm-document-and-rename-preempt_lock.patch new file mode 100644 index 00000000000..fae94c90b4a --- /dev/null +++ b/queue-5.10/drm-msm-document-and-rename-preempt_lock.patch @@ -0,0 +1,143 @@ +From 3c8d9d7d6bd4c73398e181bb2c3084cf13d15cc1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Oct 2020 09:51:15 -0700 +Subject: drm/msm: Document and rename preempt_lock + +From: Rob Clark + +[ Upstream commit 77c406038e830a4b6219b14a116cd2a6ac9f4908 ] + +Before adding another lock, give ring->lock a more descriptive name. + +Signed-off-by: Rob Clark +Reviewed-by: Jordan Crouse +Reviewed-by: Kristian H. Kristensen +Signed-off-by: Rob Clark +Stable-dep-of: b4fb748f0b73 ("drm/msm/a5xx: fix the emptyness check in the preempt code") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- + drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 ++++++------ + drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++-- + drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- + drivers/gpu/drm/msm/msm_ringbuffer.h | 7 ++++++- + 5 files changed, 17 insertions(+), 12 deletions(-) + +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +index 64da65ae6d67e..6f84db97e20e8 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +@@ -36,7 +36,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); + } + +- spin_lock_irqsave(&ring->lock, flags); ++ spin_lock_irqsave(&ring->preempt_lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; +@@ -44,7 +44,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + +- spin_unlock_irqrestore(&ring->lock, flags); ++ spin_unlock_irqrestore(&ring->preempt_lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); +diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +index 7e04509c4e1f0..183de1139eeb6 100644 +--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c ++++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +@@ -45,9 +45,9 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) + if (!ring) + return; + +- spin_lock_irqsave(&ring->lock, flags); ++ spin_lock_irqsave(&ring->preempt_lock, flags); + wptr = get_wptr(ring); +- spin_unlock_irqrestore(&ring->lock, flags); ++ spin_unlock_irqrestore(&ring->preempt_lock, flags); + + gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); + } +@@ -62,9 +62,9 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) + bool empty; + struct msm_ringbuffer *ring = gpu->rb[i]; + +- spin_lock_irqsave(&ring->lock, flags); ++ spin_lock_irqsave(&ring->preempt_lock, flags); + empty = (get_wptr(ring) == ring->memptrs->rptr); +- spin_unlock_irqrestore(&ring->lock, flags); ++ spin_unlock_irqrestore(&ring->preempt_lock, flags); + + if (!empty) + return ring; +@@ -132,9 +132,9 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) + } + + /* Make sure the wptr doesn't update while we're in motion */ +- spin_lock_irqsave(&ring->lock, flags); ++ spin_lock_irqsave(&ring->preempt_lock, flags); + a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); +- spin_unlock_irqrestore(&ring->lock, flags); ++ spin_unlock_irqrestore(&ring->preempt_lock, flags); + + /* Set the address of the incoming preemption record */ + gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, +diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +index dffc133b8b1cc..29b40acedb389 100644 +--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c ++++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +@@ -65,7 +65,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) + OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); + } + +- spin_lock_irqsave(&ring->lock, flags); ++ spin_lock_irqsave(&ring->preempt_lock, flags); + + /* Copy the shadow to the actual register */ + ring->cur = ring->next; +@@ -73,7 +73,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) + /* Make sure to wrap wptr if we need to */ + wptr = get_wptr(ring); + +- spin_unlock_irqrestore(&ring->lock, flags); ++ spin_unlock_irqrestore(&ring->preempt_lock, flags); + + /* Make sure everything is posted before making a decision */ + mb(); +diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c +index 935bf9b1d9418..1b6958e908dca 100644 +--- a/drivers/gpu/drm/msm/msm_ringbuffer.c ++++ b/drivers/gpu/drm/msm/msm_ringbuffer.c +@@ -46,7 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); +- spin_lock_init(&ring->lock); ++ spin_lock_init(&ring->preempt_lock); + + snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); + +diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h +index 0987d6bf848cf..4956d1bc5d0e1 100644 +--- a/drivers/gpu/drm/msm/msm_ringbuffer.h ++++ b/drivers/gpu/drm/msm/msm_ringbuffer.h +@@ -46,7 +46,12 @@ struct msm_ringbuffer { + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; + struct msm_fence_context *fctx; +- spinlock_t lock; ++ ++ /* ++ * preempt_lock protects preemption and serializes wptr updates against ++ * preemption. Can be aquired from irq context. ++ */ ++ spinlock_t preempt_lock; + }; + + struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, +-- +2.39.2 + diff --git a/queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch b/queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch new file mode 100644 index 00000000000..8434a989c57 --- /dev/null +++ b/queue-5.10/drm-msm-fix-potential-invalid-ptr-free.patch @@ -0,0 +1,49 @@ +From 75adf877270713e38c7156498a872511c6bffb9e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 15:50:48 -0800 +Subject: drm/msm: Fix potential invalid ptr free + +From: Rob Clark + +[ Upstream commit 8a86f213f4426f19511a16d886871805b35c3acf ] + +The error path cleanup expects that chain and syncobj are either NULL or +valid pointers. But post_deps was not allocated with __GFP_ZERO. + +Fixes: ab723b7a992a ("drm/msm: Add syncobj support.") +Signed-off-by: Rob Clark +Reviewed-by: Dmitry Baryshkov +Reviewed-by: Dmitry Osipenko +Patchwork: https://patchwork.freedesktop.org/patch/523051/ +Link: https://lore.kernel.org/r/20230215235048.1166484-1-robdclark@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/msm_gem_submit.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c +index aa5c60a7132d8..c4e5037512b9d 100644 +--- a/drivers/gpu/drm/msm/msm_gem_submit.c ++++ b/drivers/gpu/drm/msm/msm_gem_submit.c +@@ -494,8 +494,8 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, + int ret = 0; + uint32_t i, j; + +- post_deps = kmalloc_array(nr_syncobjs, sizeof(*post_deps), +- GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); ++ post_deps = kcalloc(nr_syncobjs, sizeof(*post_deps), ++ GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!post_deps) + return ERR_PTR(-ENOMEM); + +@@ -510,7 +510,6 @@ static struct msm_submit_post_dep *msm_parse_post_deps(struct drm_device *dev, + } + + post_deps[i].point = syncobj_desc.point; +- post_deps[i].chain = NULL; + + if (syncobj_desc.flags) { + ret = -EINVAL; +-- +2.39.2 + diff --git a/queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch b/queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch new file mode 100644 index 00000000000..e161a9c04d1 --- /dev/null +++ b/queue-5.10/drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch @@ -0,0 +1,64 @@ +From ebecc8c1d6e71abb356922a23a6f45847343f593 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 31 Oct 2022 12:42:29 +0100 +Subject: drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype + +From: Jiri Slaby (SUSE) + +[ Upstream commit 3638a820c5c3b52f327cebb174fd4274bee08aa7 ] + +gcc-13 warns about mismatching types for enums. That revealed switched +arguments of nv50_wndw_new_(): + drivers/gpu/drm/nouveau/dispnv50/wndw.c:696:1: error: conflicting types for 'nv50_wndw_new_' due to enum/integer mismatch; have 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, u32, enum nv50_disp_interlock_type, u32, struct nv50_wndw **)' + drivers/gpu/drm/nouveau/dispnv50/wndw.h:36:5: note: previous declaration of 'nv50_wndw_new_' with type 'int(const struct nv50_wndw_func *, struct drm_device *, enum drm_plane_type, const char *, int, const u32 *, enum nv50_disp_interlock_type, u32, u32, struct nv50_wndw **)' + +It can be barely visible, but the declaration says about the parameters +in the middle: + enum nv50_disp_interlock_type, + u32 interlock_data, + u32 heads, + +While the definition states differently: + u32 heads, + enum nv50_disp_interlock_type interlock_type, + u32 interlock_data, + +Unify/fix the declaration to match the definition. + +Fixes: 53e0a3e70de6 ("drm/nouveau/kms/nv50-: simplify tracking of channel interlocks") +Cc: Martin Liska +Cc: Ben Skeggs +Cc: Karol Herbst +Cc: Lyude Paul +Cc: David Airlie +Cc: Daniel Vetter +Cc: dri-devel@lists.freedesktop.org +Cc: nouveau@lists.freedesktop.org +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Jiri Slaby (SUSE) +Signed-off-by: Karol Herbst +Link: https://patchwork.freedesktop.org/patch/msgid/20221031114229.10289-1-jirislaby@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +index 8bed195ae098a..77bf124319fbd 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +@@ -38,8 +38,9 @@ struct nv50_wndw { + + int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *, + enum drm_plane_type, const char *name, int index, +- const u32 *format, enum nv50_disp_interlock_type, +- u32 interlock_data, u32 heads, struct nv50_wndw **); ++ const u32 *format, u32 heads, ++ enum nv50_disp_interlock_type, u32 interlock_data, ++ struct nv50_wndw **); + void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock, + struct nv50_wndw_atom *); + void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush, +-- +2.39.2 + diff --git a/queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch b/queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch new file mode 100644 index 00000000000..8e7f2bb1e2c --- /dev/null +++ b/queue-5.10/drm-nouveau-kms-nv50-remove-unused-functions.patch @@ -0,0 +1,100 @@ +From 08dfe9bad732d3e0ae2dc55714376d34aa89b0bd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Jun 2022 20:46:06 +1000 +Subject: drm/nouveau/kms/nv50-: remove unused functions + +From: Ben Skeggs + +[ Upstream commit 89ed996b888faaf11c69bb4cbc19f21475c9050e ] + +Signed-off-by: Ben Skeggs +Reviewed-by: Dave Airlie +Signed-off-by: Dave Airlie +Stable-dep-of: 3638a820c5c3 ("drm/nouveau/kms/nv50: fix nv50_wndw_new_ prototype") +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/nouveau/dispnv50/disp.c | 16 ---------------- + drivers/gpu/drm/nouveau/dispnv50/wndw.c | 12 ------------ + drivers/gpu/drm/nouveau/dispnv50/wndw.h | 2 -- + 3 files changed, 30 deletions(-) + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c +index c2d34c91e840c..804ea035fa46b 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c ++++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c +@@ -2555,14 +2555,6 @@ nv50_display_fini(struct drm_device *dev, bool runtime, bool suspend) + { + struct nouveau_drm *drm = nouveau_drm(dev); + struct drm_encoder *encoder; +- struct drm_plane *plane; +- +- drm_for_each_plane(plane, dev) { +- struct nv50_wndw *wndw = nv50_wndw(plane); +- if (plane->funcs != &nv50_wndw) +- continue; +- nv50_wndw_fini(wndw); +- } + + list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) { + if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) +@@ -2578,7 +2570,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime) + { + struct nv50_core *core = nv50_disp(dev)->core; + struct drm_encoder *encoder; +- struct drm_plane *plane; + + if (resume || runtime) + core->func->init(core); +@@ -2591,13 +2582,6 @@ nv50_display_init(struct drm_device *dev, bool resume, bool runtime) + } + } + +- drm_for_each_plane(plane, dev) { +- struct nv50_wndw *wndw = nv50_wndw(plane); +- if (plane->funcs != &nv50_wndw) +- continue; +- nv50_wndw_init(wndw); +- } +- + return 0; + } + +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c +index f07916ffe42cb..831125b4453df 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c +@@ -690,18 +690,6 @@ nv50_wndw_notify(struct nvif_notify *notify) + return NVIF_NOTIFY_KEEP; + } + +-void +-nv50_wndw_fini(struct nv50_wndw *wndw) +-{ +- nvif_notify_put(&wndw->notify); +-} +- +-void +-nv50_wndw_init(struct nv50_wndw *wndw) +-{ +- nvif_notify_get(&wndw->notify); +-} +- + static const u64 nv50_cursor_format_modifiers[] = { + DRM_FORMAT_MOD_LINEAR, + DRM_FORMAT_MOD_INVALID, +diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +index 3278e28800343..8bed195ae098a 100644 +--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h ++++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h +@@ -40,8 +40,6 @@ int nv50_wndw_new_(const struct nv50_wndw_func *, struct drm_device *, + enum drm_plane_type, const char *name, int index, + const u32 *format, enum nv50_disp_interlock_type, + u32 interlock_data, u32 heads, struct nv50_wndw **); +-void nv50_wndw_init(struct nv50_wndw *); +-void nv50_wndw_fini(struct nv50_wndw *); + void nv50_wndw_flush_set(struct nv50_wndw *, u32 *interlock, + struct nv50_wndw_atom *); + void nv50_wndw_flush_clr(struct nv50_wndw *, u32 *interlock, bool flush, +-- +2.39.2 + diff --git a/queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch b/queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch new file mode 100644 index 00000000000..33a8c61278b --- /dev/null +++ b/queue-5.10/efi-earlycon-replace-open-coded-strnchrnul.patch @@ -0,0 +1,55 @@ +From 6e92a7ec121c7e23afadefc601975ace938df6db Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 Dec 2022 00:12:16 +0200 +Subject: efi/earlycon: Replace open coded strnchrnul() + +From: Andy Shevchenko + +[ Upstream commit b7a1cd243839cc1459fbc83a7a62e3b57f29f497 ] + +strnchrnul() can be called in the early stages. Replace +open coded variant in the EFI early console driver. + +Signed-off-by: Andy Shevchenko +Signed-off-by: Ard Biesheuvel +Stable-dep-of: 0e68b5517d37 ("arm64: efi: Make efi_rt_lock a raw_spinlock") +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/earlycon.c | 13 ++++--------- + 1 file changed, 4 insertions(+), 9 deletions(-) + +diff --git a/drivers/firmware/efi/earlycon.c b/drivers/firmware/efi/earlycon.c +index a52236e11e5f7..fc233b6f27cb2 100644 +--- a/drivers/firmware/efi/earlycon.c ++++ b/drivers/firmware/efi/earlycon.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + #include + +@@ -143,16 +144,10 @@ efi_earlycon_write(struct console *con, const char *str, unsigned int num) + len = si->lfb_linelength; + + while (num) { +- unsigned int linemax; +- unsigned int h, count = 0; ++ unsigned int linemax = (si->lfb_width - efi_x) / font->width; ++ unsigned int h, count; + +- for (s = str; *s && *s != '\n'; s++) { +- if (count == num) +- break; +- count++; +- } +- +- linemax = (si->lfb_width - efi_x) / font->width; ++ count = strnchrnul(str, num, '\n') - str; + if (count > linemax) + count = linemax; + +-- +2.39.2 + diff --git a/queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch b/queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch new file mode 100644 index 00000000000..ac09fa34aa9 --- /dev/null +++ b/queue-5.10/ext4-fix-possible-corruption-when-moving-a-directory.patch @@ -0,0 +1,59 @@ +From f3fd8dc76223f6f1f523e9da85d149c534d08103 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 26 Jan 2023 12:22:21 +0100 +Subject: ext4: Fix possible corruption when moving a directory + +From: Jan Kara + +[ Upstream commit 0813299c586b175d7edb25f56412c54b812d0379 ] + +When we are renaming a directory to a different directory, we need to +update '..' entry in the moved directory. However nothing prevents moved +directory from being modified and even converted from the inline format +to the normal format. When such race happens the rename code gets +confused and we crash. Fix the problem by locking the moved directory. + +CC: stable@vger.kernel.org +Fixes: 32f7f22c0b52 ("ext4: let ext4_rename handle inline dir") +Signed-off-by: Jan Kara +Link: https://lore.kernel.org/r/20230126112221.11866-1-jack@suse.cz +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/namei.c | 11 ++++++++++- + 1 file changed, 10 insertions(+), 1 deletion(-) + +diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c +index 6f335d58183ee..17590bb769147 100644 +--- a/fs/ext4/namei.c ++++ b/fs/ext4/namei.c +@@ -3923,9 +3923,16 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, + if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) + goto end_rename; + } ++ /* ++ * We need to protect against old.inode directory getting ++ * converted from inline directory format into a normal one. ++ */ ++ inode_lock_nested(old.inode, I_MUTEX_NONDIR2); + retval = ext4_rename_dir_prepare(handle, &old); +- if (retval) ++ if (retval) { ++ inode_unlock(old.inode); + goto end_rename; ++ } + } + /* + * If we're renaming a file within an inline_data dir and adding or +@@ -4050,6 +4057,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, + } else { + ext4_journal_stop(handle); + } ++ if (old.dir_bh) ++ inode_unlock(old.inode); + release_bh: + brelse(old.dir_bh); + brelse(old.bh); +-- +2.39.2 + diff --git a/queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch b/queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch new file mode 100644 index 00000000000..f59bda9ee56 --- /dev/null +++ b/queue-5.10/ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch @@ -0,0 +1,113 @@ +From 1339c021f60dc9b4a3e13f93010a5ec6da10eddb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 15:30:24 +0000 +Subject: ila: do not generate empty messages in ila_xlat_nl_cmd_get_mapping() + +From: Eric Dumazet + +[ Upstream commit 693aa2c0d9b6d5b1f2745d31b6e70d09dbbaf06e ] + +ila_xlat_nl_cmd_get_mapping() generates an empty skb, +triggerring a recent sanity check [1]. + +Instead, return an error code, so that user space +can get it. + +[1] +skb_assert_len +WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 skb_assert_len include/linux/skbuff.h:2527 [inline] +WARNING: CPU: 0 PID: 5923 at include/linux/skbuff.h:2527 __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +Modules linked in: +CPU: 0 PID: 5923 Comm: syz-executor269 Not tainted 6.2.0-syzkaller-18300-g2ebd1fbb946d #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/21/2023 +pstate: 60400005 (nZCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : skb_assert_len include/linux/skbuff.h:2527 [inline] +pc : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +lr : skb_assert_len include/linux/skbuff.h:2527 [inline] +lr : __dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +sp : ffff80001e0d6c40 +x29: ffff80001e0d6e60 x28: dfff800000000000 x27: ffff0000c86328c0 +x26: dfff800000000000 x25: ffff0000c8632990 x24: ffff0000c8632a00 +x23: 0000000000000000 x22: 1fffe000190c6542 x21: ffff0000c8632a10 +x20: ffff0000c8632a00 x19: ffff80001856e000 x18: ffff80001e0d5fc0 +x17: 0000000000000000 x16: ffff80001235d16c x15: 0000000000000000 +x14: 0000000000000000 x13: 0000000000000001 x12: 0000000000000001 +x11: ff80800008353a30 x10: 0000000000000000 x9 : 21567eaf25bfb600 +x8 : 21567eaf25bfb600 x7 : 0000000000000001 x6 : 0000000000000001 +x5 : ffff80001e0d6558 x4 : ffff800015c74760 x3 : ffff800008596744 +x2 : 0000000000000001 x1 : 0000000100000000 x0 : 000000000000000e +Call trace: +skb_assert_len include/linux/skbuff.h:2527 [inline] +__dev_queue_xmit+0x1bc0/0x3488 net/core/dev.c:4156 +dev_queue_xmit include/linux/netdevice.h:3033 [inline] +__netlink_deliver_tap_skb net/netlink/af_netlink.c:307 [inline] +__netlink_deliver_tap+0x45c/0x6f8 net/netlink/af_netlink.c:325 +netlink_deliver_tap+0xf4/0x174 net/netlink/af_netlink.c:338 +__netlink_sendskb net/netlink/af_netlink.c:1283 [inline] +netlink_sendskb+0x6c/0x154 net/netlink/af_netlink.c:1292 +netlink_unicast+0x334/0x8d4 net/netlink/af_netlink.c:1380 +nlmsg_unicast include/net/netlink.h:1099 [inline] +genlmsg_unicast include/net/genetlink.h:433 [inline] +genlmsg_reply include/net/genetlink.h:443 [inline] +ila_xlat_nl_cmd_get_mapping+0x620/0x7d0 net/ipv6/ila/ila_xlat.c:493 +genl_family_rcv_msg_doit net/netlink/genetlink.c:968 [inline] +genl_family_rcv_msg net/netlink/genetlink.c:1048 [inline] +genl_rcv_msg+0x938/0xc1c net/netlink/genetlink.c:1065 +netlink_rcv_skb+0x214/0x3c4 net/netlink/af_netlink.c:2574 +genl_rcv+0x38/0x50 net/netlink/genetlink.c:1076 +netlink_unicast_kernel net/netlink/af_netlink.c:1339 [inline] +netlink_unicast+0x660/0x8d4 net/netlink/af_netlink.c:1365 +netlink_sendmsg+0x800/0xae0 net/netlink/af_netlink.c:1942 +sock_sendmsg_nosec net/socket.c:714 [inline] +sock_sendmsg net/socket.c:734 [inline] +____sys_sendmsg+0x558/0x844 net/socket.c:2479 +___sys_sendmsg net/socket.c:2533 [inline] +__sys_sendmsg+0x26c/0x33c net/socket.c:2562 +__do_sys_sendmsg net/socket.c:2571 [inline] +__se_sys_sendmsg net/socket.c:2569 [inline] +__arm64_sys_sendmsg+0x80/0x94 net/socket.c:2569 +__invoke_syscall arch/arm64/kernel/syscall.c:38 [inline] +invoke_syscall+0x98/0x2c0 arch/arm64/kernel/syscall.c:52 +el0_svc_common+0x138/0x258 arch/arm64/kernel/syscall.c:142 +do_el0_svc+0x64/0x198 arch/arm64/kernel/syscall.c:193 +el0_svc+0x58/0x168 arch/arm64/kernel/entry-common.c:637 +el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:655 +el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:591 +irq event stamp: 136484 +hardirqs last enabled at (136483): [] __up_console_sem+0x60/0xb4 kernel/printk/printk.c:345 +hardirqs last disabled at (136484): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:405 +softirqs last enabled at (136418): [] softirq_handle_end kernel/softirq.c:414 [inline] +softirqs last enabled at (136418): [] __do_softirq+0xd4c/0xfa4 kernel/softirq.c:600 +softirqs last disabled at (136371): [] ____do_softirq+0x14/0x20 arch/arm64/kernel/irq.c:80 +---[ end trace 0000000000000000 ]--- +skb len=0 headroom=0 headlen=0 tailroom=192 +mac=(0,0) net=(0,-1) trans=-1 +shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0)) +csum(0x0 ip_summed=0 complete_sw=0 valid=0 level=0) +hash(0x0 sw=0 l4=0) proto=0x0010 pkttype=6 iif=0 +dev name=nlmon0 feat=0x0000000000005861 + +Fixes: 7f00feaf1076 ("ila: Add generic ILA translation facility") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv6/ila/ila_xlat.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c +index a1ac0e3d8c60c..163668531a57f 100644 +--- a/net/ipv6/ila/ila_xlat.c ++++ b/net/ipv6/ila/ila_xlat.c +@@ -477,6 +477,7 @@ int ila_xlat_nl_cmd_get_mapping(struct sk_buff *skb, struct genl_info *info) + + rcu_read_lock(); + ++ ret = -ESRCH; + ila = ila_lookup_by_params(&xp, ilan); + if (ila) { + ret = ila_dump_info(ila, +-- +2.39.2 + diff --git a/queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch b/queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch new file mode 100644 index 00000000000..9e4bc7052f9 --- /dev/null +++ b/queue-5.10/iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch @@ -0,0 +1,101 @@ +From 6f729f8ef0d0d13b35f56334c130830099a64bbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 20 Mar 2021 10:09:16 +0800 +Subject: iommu/vt-d: Fix lockdep splat in intel_pasid_get_entry() + +From: Lu Baolu + +[ Upstream commit 803766cbf85fb8edbf896729bbefc2d38dcf1e0a ] + +The pasid_lock is used to synchronize different threads from modifying a +same pasid directory entry at the same time. It causes below lockdep splat. + +[ 83.296538] ======================================================== +[ 83.296538] WARNING: possible irq lock inversion dependency detected +[ 83.296539] 5.12.0-rc3+ #25 Tainted: G W +[ 83.296539] -------------------------------------------------------- +[ 83.296540] bash/780 just changed the state of lock: +[ 83.296540] ffffffff82b29c98 (device_domain_lock){..-.}-{2:2}, at: + iommu_flush_dev_iotlb.part.0+0x32/0x110 +[ 83.296547] but this lock took another, SOFTIRQ-unsafe lock in the past: +[ 83.296547] (pasid_lock){+.+.}-{2:2} +[ 83.296548] + + and interrupts could create inverse lock ordering between them. + +[ 83.296549] other info that might help us debug this: +[ 83.296549] Chain exists of: + device_domain_lock --> &iommu->lock --> pasid_lock +[ 83.296551] Possible interrupt unsafe locking scenario: + +[ 83.296551] CPU0 CPU1 +[ 83.296552] ---- ---- +[ 83.296552] lock(pasid_lock); +[ 83.296553] local_irq_disable(); +[ 83.296553] lock(device_domain_lock); +[ 83.296554] lock(&iommu->lock); +[ 83.296554] +[ 83.296554] lock(device_domain_lock); +[ 83.296555] + *** DEADLOCK *** + +Fix it by replacing the pasid_lock with an atomic exchange operation. + +Reported-and-tested-by: Dave Jiang +Signed-off-by: Lu Baolu +Link: https://lore.kernel.org/r/20210320020916.640115-1-baolu.lu@linux.intel.com +Signed-off-by: Joerg Roedel +Stable-dep-of: 194b3348bdbb ("iommu/vt-d: Fix PASID directory pointer coherency") +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/pasid.c | 21 +++++++++++++-------- + 1 file changed, 13 insertions(+), 8 deletions(-) + +diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c +index 86fd49ae7f612..f821153390e53 100644 +--- a/drivers/iommu/intel/pasid.c ++++ b/drivers/iommu/intel/pasid.c +@@ -24,7 +24,6 @@ + /* + * Intel IOMMU system wide PASID name space: + */ +-static DEFINE_SPINLOCK(pasid_lock); + u32 intel_pasid_max_id = PASID_MAX; + + int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) +@@ -259,19 +258,25 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) + dir_index = pasid >> PASID_PDE_SHIFT; + index = pasid & PASID_PTE_MASK; + +- spin_lock(&pasid_lock); ++retry: + entries = get_pasid_table_from_pde(&dir[dir_index]); + if (!entries) { + entries = alloc_pgtable_page(info->iommu->node); +- if (!entries) { +- spin_unlock(&pasid_lock); ++ if (!entries) + return NULL; +- } + +- WRITE_ONCE(dir[dir_index].val, +- (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); ++ /* ++ * The pasid directory table entry won't be freed after ++ * allocation. No worry about the race with free and ++ * clear. However, this entry might be populated by others ++ * while we are preparing it. Use theirs with a retry. ++ */ ++ if (cmpxchg64(&dir[dir_index].val, 0ULL, ++ (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { ++ free_pgtable_page(entries); ++ goto retry; ++ } + } +- spin_unlock(&pasid_lock); + + return &entries[index]; + } +-- +2.39.2 + diff --git a/queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch b/queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch new file mode 100644 index 00000000000..8f482b6309a --- /dev/null +++ b/queue-5.10/iommu-vt-d-fix-pasid-directory-pointer-coherency.patch @@ -0,0 +1,82 @@ +From c5ea5be059771ebbdbacad4dbe4a9958b7cd028b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Feb 2023 21:08:15 +0800 +Subject: iommu/vt-d: Fix PASID directory pointer coherency + +From: Jacob Pan + +[ Upstream commit 194b3348bdbb7db65375c72f3f774aee4cc6614e ] + +On platforms that do not support IOMMU Extended capability bit 0 +Page-walk Coherency, CPU caches are not snooped when IOMMU is accessing +any translation structures. IOMMU access goes only directly to +memory. Intel IOMMU code was missing a flush for the PASID table +directory that resulted in the unrecoverable fault as shown below. + +This patch adds clflush calls whenever allocating and updating +a PASID table directory to ensure cache coherency. + +On the reverse direction, there's no need to clflush the PASID directory +pointer when we deactivate a context entry in that IOMMU hardware will +not see the old PASID directory pointer after we clear the context entry. +PASID directory entries are also never freed once allocated. + + DMAR: DRHD: handling fault status reg 3 + DMAR: [DMA Read NO_PASID] Request device [00:0d.2] fault addr 0x1026a4000 + [fault reason 0x51] SM: Present bit in Directory Entry is clear + DMAR: Dump dmar1 table entries for IOVA 0x1026a4000 + DMAR: scalable mode root entry: hi 0x0000000102448001, low 0x0000000101b3e001 + DMAR: context entry: hi 0x0000000000000000, low 0x0000000101b4d401 + DMAR: pasid dir entry: 0x0000000101b4e001 + DMAR: pasid table entry[0]: 0x0000000000000109 + DMAR: pasid table entry[1]: 0x0000000000000001 + DMAR: pasid table entry[2]: 0x0000000000000000 + DMAR: pasid table entry[3]: 0x0000000000000000 + DMAR: pasid table entry[4]: 0x0000000000000000 + DMAR: pasid table entry[5]: 0x0000000000000000 + DMAR: pasid table entry[6]: 0x0000000000000000 + DMAR: pasid table entry[7]: 0x0000000000000000 + DMAR: PTE not present at level 4 + +Cc: +Fixes: 0bbeb01a4faf ("iommu/vt-d: Manage scalalble mode PASID tables") +Reviewed-by: Kevin Tian +Reported-by: Sukumar Ghorai +Signed-off-by: Ashok Raj +Signed-off-by: Jacob Pan +Link: https://lore.kernel.org/r/20230209212843.1788125-1-jacob.jun.pan@linux.intel.com +Signed-off-by: Lu Baolu +Signed-off-by: Joerg Roedel +Signed-off-by: Sasha Levin +--- + drivers/iommu/intel/pasid.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c +index f821153390e53..80d6412e2c546 100644 +--- a/drivers/iommu/intel/pasid.c ++++ b/drivers/iommu/intel/pasid.c +@@ -186,6 +186,9 @@ int intel_pasid_alloc_table(struct device *dev) + attach_out: + device_attach_pasid_table(info, pasid_table); + ++ if (!ecap_coherent(info->iommu->ecap)) ++ clflush_cache_range(pasid_table->table, size); ++ + return 0; + } + +@@ -276,6 +279,10 @@ struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) + free_pgtable_page(entries); + goto retry; + } ++ if (!ecap_coherent(info->iommu->ecap)) { ++ clflush_cache_range(entries, VTD_PAGE_SIZE); ++ clflush_cache_range(&dir[dir_index].val, sizeof(*dir)); ++ } + } + + return &entries[index]; +-- +2.39.2 + diff --git a/queue-5.10/irq-fix-typos-in-comments.patch b/queue-5.10/irq-fix-typos-in-comments.patch new file mode 100644 index 00000000000..e85661b8fa8 --- /dev/null +++ b/queue-5.10/irq-fix-typos-in-comments.patch @@ -0,0 +1,465 @@ +From aeb29bf469c12a96239daf3b5055dbb7cc916184 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 22 Mar 2021 04:21:30 +0100 +Subject: irq: Fix typos in comments + +From: Ingo Molnar + +[ Upstream commit a359f757965aafd0f58570de95dc6bc06cf12a9c ] + +Fix ~36 single-word typos in the IRQ, irqchip and irqdomain code comments. + +Signed-off-by: Ingo Molnar +Cc: Thomas Gleixner +Cc: Marc Zyngier +Cc: Borislav Petkov +Cc: Peter Zijlstra +Cc: linux-kernel@vger.kernel.org +Signed-off-by: Ingo Molnar +Stable-dep-of: 6e6f75c9c98d ("irqdomain: Look for existing mapping only once") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-aspeed-vic.c | 4 ++-- + drivers/irqchip/irq-bcm7120-l2.c | 2 +- + drivers/irqchip/irq-csky-apb-intc.c | 2 +- + drivers/irqchip/irq-gic-v2m.c | 2 +- + drivers/irqchip/irq-gic-v3-its.c | 10 +++++----- + drivers/irqchip/irq-gic-v3.c | 2 +- + drivers/irqchip/irq-loongson-pch-pic.c | 2 +- + drivers/irqchip/irq-meson-gpio.c | 2 +- + drivers/irqchip/irq-mtk-cirq.c | 2 +- + drivers/irqchip/irq-mxs.c | 4 ++-- + drivers/irqchip/irq-sun4i.c | 2 +- + drivers/irqchip/irq-ti-sci-inta.c | 2 +- + drivers/irqchip/irq-vic.c | 4 ++-- + drivers/irqchip/irq-xilinx-intc.c | 2 +- + include/linux/irq.h | 4 ++-- + include/linux/irqdesc.h | 2 +- + kernel/irq/chip.c | 2 +- + kernel/irq/dummychip.c | 2 +- + kernel/irq/irqdesc.c | 2 +- + kernel/irq/irqdomain.c | 8 ++++---- + kernel/irq/manage.c | 6 +++--- + kernel/irq/msi.c | 2 +- + kernel/irq/timings.c | 2 +- + 23 files changed, 36 insertions(+), 36 deletions(-) + +diff --git a/drivers/irqchip/irq-aspeed-vic.c b/drivers/irqchip/irq-aspeed-vic.c +index 6567ed782f82c..58717cd44f99f 100644 +--- a/drivers/irqchip/irq-aspeed-vic.c ++++ b/drivers/irqchip/irq-aspeed-vic.c +@@ -71,7 +71,7 @@ static void vic_init_hw(struct aspeed_vic *vic) + writel(0, vic->base + AVIC_INT_SELECT); + writel(0, vic->base + AVIC_INT_SELECT + 4); + +- /* Some interrupts have a programable high/low level trigger ++ /* Some interrupts have a programmable high/low level trigger + * (4 GPIO direct inputs), for now we assume this was configured + * by firmware. We read which ones are edge now. + */ +@@ -203,7 +203,7 @@ static int __init avic_of_init(struct device_node *node, + } + vic->base = regs; + +- /* Initialize soures, all masked */ ++ /* Initialize sources, all masked */ + vic_init_hw(vic); + + /* Ready to receive interrupts */ +diff --git a/drivers/irqchip/irq-bcm7120-l2.c b/drivers/irqchip/irq-bcm7120-l2.c +index 7d776c905b7d2..1c2c5bd5a9fc1 100644 +--- a/drivers/irqchip/irq-bcm7120-l2.c ++++ b/drivers/irqchip/irq-bcm7120-l2.c +@@ -310,7 +310,7 @@ static int __init bcm7120_l2_intc_probe(struct device_node *dn, + + if (data->can_wake) { + /* This IRQ chip can wake the system, set all +- * relevant child interupts in wake_enabled mask ++ * relevant child interrupts in wake_enabled mask + */ + gc->wake_enabled = 0xffffffff; + gc->wake_enabled &= ~gc->unused; +diff --git a/drivers/irqchip/irq-csky-apb-intc.c b/drivers/irqchip/irq-csky-apb-intc.c +index 5a2ec43b7ddd4..ab91afa867557 100644 +--- a/drivers/irqchip/irq-csky-apb-intc.c ++++ b/drivers/irqchip/irq-csky-apb-intc.c +@@ -176,7 +176,7 @@ gx_intc_init(struct device_node *node, struct device_node *parent) + writel(0x0, reg_base + GX_INTC_NEN63_32); + + /* +- * Initial mask reg with all unmasked, because we only use enalbe reg ++ * Initial mask reg with all unmasked, because we only use enable reg + */ + writel(0x0, reg_base + GX_INTC_NMASK31_00); + writel(0x0, reg_base + GX_INTC_NMASK63_32); +diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c +index fbec07d634ad2..4116b48e60aff 100644 +--- a/drivers/irqchip/irq-gic-v2m.c ++++ b/drivers/irqchip/irq-gic-v2m.c +@@ -371,7 +371,7 @@ static int __init gicv2m_init_one(struct fwnode_handle *fwnode, + * the MSI data is the absolute value within the range from + * spi_start to (spi_start + num_spis). + * +- * Broadom NS2 GICv2m implementation has an erratum where the MSI data ++ * Broadcom NS2 GICv2m implementation has an erratum where the MSI data + * is 'spi_number - 32' + * + * Reading that register fails on the Graviton implementation +diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c +index d8cb5bcd6b10e..5ec091c64d47f 100644 +--- a/drivers/irqchip/irq-gic-v3-its.c ++++ b/drivers/irqchip/irq-gic-v3-its.c +@@ -1492,7 +1492,7 @@ static void its_vlpi_set_doorbell(struct irq_data *d, bool enable) + * + * Ideally, we'd issue a VMAPTI to set the doorbell to its LPI + * value or to 1023, depending on the enable bit. But that +- * would be issueing a mapping for an /existing/ DevID+EventID ++ * would be issuing a mapping for an /existing/ DevID+EventID + * pair, which is UNPREDICTABLE. Instead, let's issue a VMOVI + * to the /same/ vPE, using this opportunity to adjust the + * doorbell. Mouahahahaha. We loves it, Precious. +@@ -3122,7 +3122,7 @@ static void its_cpu_init_lpis(void) + + /* + * It's possible for CPU to receive VLPIs before it is +- * sheduled as a vPE, especially for the first CPU, and the ++ * scheduled as a vPE, especially for the first CPU, and the + * VLPI with INTID larger than 2^(IDbits+1) will be considered + * as out of range and dropped by GIC. + * So we initialize IDbits to known value to avoid VLPI drop. +@@ -3613,7 +3613,7 @@ static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq, + + /* + * If all interrupts have been freed, start mopping the +- * floor. This is conditionned on the device not being shared. ++ * floor. This is conditioned on the device not being shared. + */ + if (!its_dev->shared && + bitmap_empty(its_dev->event_map.lpi_map, +@@ -4187,7 +4187,7 @@ static int its_sgi_set_affinity(struct irq_data *d, + { + /* + * There is no notion of affinity for virtual SGIs, at least +- * not on the host (since they can only be targetting a vPE). ++ * not on the host (since they can only be targeting a vPE). + * Tell the kernel we've done whatever it asked for. + */ + irq_data_update_effective_affinity(d, mask_val); +@@ -4232,7 +4232,7 @@ static int its_sgi_get_irqchip_state(struct irq_data *d, + /* + * Locking galore! We can race against two different events: + * +- * - Concurent vPE affinity change: we must make sure it cannot ++ * - Concurrent vPE affinity change: we must make sure it cannot + * happen, or we'll talk to the wrong redistributor. This is + * identical to what happens with vLPIs. + * +diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c +index 4c8f18f0cecf8..2805969e4f15a 100644 +--- a/drivers/irqchip/irq-gic-v3.c ++++ b/drivers/irqchip/irq-gic-v3.c +@@ -1456,7 +1456,7 @@ static int gic_irq_domain_translate(struct irq_domain *d, + + /* + * Make it clear that broken DTs are... broken. +- * Partitionned PPIs are an unfortunate exception. ++ * Partitioned PPIs are an unfortunate exception. + */ + WARN_ON(*type == IRQ_TYPE_NONE && + fwspec->param[0] != GIC_IRQ_TYPE_PARTITION); +diff --git a/drivers/irqchip/irq-loongson-pch-pic.c b/drivers/irqchip/irq-loongson-pch-pic.c +index 90e1ad6e36120..a4eb8a2181c7f 100644 +--- a/drivers/irqchip/irq-loongson-pch-pic.c ++++ b/drivers/irqchip/irq-loongson-pch-pic.c +@@ -180,7 +180,7 @@ static void pch_pic_reset(struct pch_pic *priv) + int i; + + for (i = 0; i < PIC_COUNT; i++) { +- /* Write vectore ID */ ++ /* Write vectored ID */ + writeb(priv->ht_vec_base + i, priv->base + PCH_INT_HTVEC(i)); + /* Hardcode route to HT0 Lo */ + writeb(1, priv->base + PCH_INT_ROUTE(i)); +diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c +index bc7aebcc96e9c..e50676ce2ec84 100644 +--- a/drivers/irqchip/irq-meson-gpio.c ++++ b/drivers/irqchip/irq-meson-gpio.c +@@ -227,7 +227,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, + + /* + * Get the hwirq number assigned to this channel through +- * a pointer the channel_irq table. The added benifit of this ++ * a pointer the channel_irq table. The added benefit of this + * method is that we can also retrieve the channel index with + * it, using the table base. + */ +diff --git a/drivers/irqchip/irq-mtk-cirq.c b/drivers/irqchip/irq-mtk-cirq.c +index 69ba8ce3c1785..9bca0918078e8 100644 +--- a/drivers/irqchip/irq-mtk-cirq.c ++++ b/drivers/irqchip/irq-mtk-cirq.c +@@ -217,7 +217,7 @@ static void mtk_cirq_resume(void) + { + u32 value; + +- /* flush recored interrupts, will send signals to parent controller */ ++ /* flush recorded interrupts, will send signals to parent controller */ + value = readl_relaxed(cirq_data->base + CIRQ_CONTROL); + writel_relaxed(value | CIRQ_FLUSH, cirq_data->base + CIRQ_CONTROL); + +diff --git a/drivers/irqchip/irq-mxs.c b/drivers/irqchip/irq-mxs.c +index a671938fd97f6..d1f5740cd5755 100644 +--- a/drivers/irqchip/irq-mxs.c ++++ b/drivers/irqchip/irq-mxs.c +@@ -58,7 +58,7 @@ struct icoll_priv { + static struct icoll_priv icoll_priv; + static struct irq_domain *icoll_domain; + +-/* calculate bit offset depending on number of intterupt per register */ ++/* calculate bit offset depending on number of interrupt per register */ + static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) + { + /* +@@ -68,7 +68,7 @@ static u32 icoll_intr_bitshift(struct irq_data *d, u32 bit) + return bit << ((d->hwirq & 3) << 3); + } + +-/* calculate mem offset depending on number of intterupt per register */ ++/* calculate mem offset depending on number of interrupt per register */ + static void __iomem *icoll_intr_reg(struct irq_data *d) + { + /* offset = hwirq / intr_per_reg * 0x10 */ +diff --git a/drivers/irqchip/irq-sun4i.c b/drivers/irqchip/irq-sun4i.c +index fb78d6623556c..9ea94456b178c 100644 +--- a/drivers/irqchip/irq-sun4i.c ++++ b/drivers/irqchip/irq-sun4i.c +@@ -189,7 +189,7 @@ static void __exception_irq_entry sun4i_handle_irq(struct pt_regs *regs) + * 3) spurious irq + * So if we immediately get a reading of 0, check the irq-pending reg + * to differentiate between 2 and 3. We only do this once to avoid +- * the extra check in the common case of 1 hapening after having ++ * the extra check in the common case of 1 happening after having + * read the vector-reg once. + */ + hwirq = readl(irq_ic_data->irq_base + SUN4I_IRQ_VECTOR_REG) >> 2; +diff --git a/drivers/irqchip/irq-ti-sci-inta.c b/drivers/irqchip/irq-ti-sci-inta.c +index 532d0ae172d9f..ca1f593f4d13a 100644 +--- a/drivers/irqchip/irq-ti-sci-inta.c ++++ b/drivers/irqchip/irq-ti-sci-inta.c +@@ -78,7 +78,7 @@ struct ti_sci_inta_vint_desc { + * struct ti_sci_inta_irq_domain - Structure representing a TISCI based + * Interrupt Aggregator IRQ domain. + * @sci: Pointer to TISCI handle +- * @vint: TISCI resource pointer representing IA inerrupts. ++ * @vint: TISCI resource pointer representing IA interrupts. + * @global_event: TISCI resource pointer representing global events. + * @vint_list: List of the vints active in the system + * @vint_mutex: Mutex to protect vint_list +diff --git a/drivers/irqchip/irq-vic.c b/drivers/irqchip/irq-vic.c +index e460363742272..62f3d29f90420 100644 +--- a/drivers/irqchip/irq-vic.c ++++ b/drivers/irqchip/irq-vic.c +@@ -163,7 +163,7 @@ static struct syscore_ops vic_syscore_ops = { + }; + + /** +- * vic_pm_init - initicall to register VIC pm ++ * vic_pm_init - initcall to register VIC pm + * + * This is called via late_initcall() to register + * the resources for the VICs due to the early +@@ -397,7 +397,7 @@ static void __init vic_clear_interrupts(void __iomem *base) + /* + * The PL190 cell from ARM has been modified by ST to handle 64 interrupts. + * The original cell has 32 interrupts, while the modified one has 64, +- * replocating two blocks 0x00..0x1f in 0x20..0x3f. In that case ++ * replicating two blocks 0x00..0x1f in 0x20..0x3f. In that case + * the probe function is called twice, with base set to offset 000 + * and 020 within the page. We call this "second block". + */ +diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c +index 1d3d273309bd3..8cd1bfc730572 100644 +--- a/drivers/irqchip/irq-xilinx-intc.c ++++ b/drivers/irqchip/irq-xilinx-intc.c +@@ -210,7 +210,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, + + /* + * Disable all external interrupts until they are +- * explicity requested. ++ * explicitly requested. + */ + xintc_write(irqc, IER, 0); + +diff --git a/include/linux/irq.h b/include/linux/irq.h +index 607bee9271bd7..b89a8ac83d1bc 100644 +--- a/include/linux/irq.h ++++ b/include/linux/irq.h +@@ -116,7 +116,7 @@ enum { + * IRQ_SET_MASK_NOCPY - OK, chip did update irq_common_data.affinity + * IRQ_SET_MASK_OK_DONE - Same as IRQ_SET_MASK_OK for core. Special code to + * support stacked irqchips, which indicates skipping +- * all descendent irqchips. ++ * all descendant irqchips. + */ + enum { + IRQ_SET_MASK_OK = 0, +@@ -302,7 +302,7 @@ static inline bool irqd_is_level_type(struct irq_data *d) + + /* + * Must only be called of irqchip.irq_set_affinity() or low level +- * hieararchy domain allocation functions. ++ * hierarchy domain allocation functions. + */ + static inline void irqd_set_single_target(struct irq_data *d) + { +diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h +index 5745491303e03..fdb22e0f9a91e 100644 +--- a/include/linux/irqdesc.h ++++ b/include/linux/irqdesc.h +@@ -32,7 +32,7 @@ struct pt_regs; + * @last_unhandled: aging timer for unhandled count + * @irqs_unhandled: stats field for spurious unhandled interrupts + * @threads_handled: stats field for deferred spurious detection of threaded handlers +- * @threads_handled_last: comparator field for deferred spurious detection of theraded handlers ++ * @threads_handled_last: comparator field for deferred spurious detection of threaded handlers + * @lock: locking for SMP + * @affinity_hint: hint to user space for preferred irq affinity + * @affinity_notify: context for notification of affinity changes +diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c +index 621d8dd157bc1..e7d284261d450 100644 +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -811,7 +811,7 @@ void handle_edge_irq(struct irq_desc *desc) + /* + * When another irq arrived while we were handling + * one, we could have masked the irq. +- * Renable it, if it was not disabled in meantime. ++ * Reenable it, if it was not disabled in meantime. + */ + if (unlikely(desc->istate & IRQS_PENDING)) { + if (!irqd_irq_disabled(&desc->irq_data) && +diff --git a/kernel/irq/dummychip.c b/kernel/irq/dummychip.c +index 0b0cdf206dc44..7fe6cffe7d0df 100644 +--- a/kernel/irq/dummychip.c ++++ b/kernel/irq/dummychip.c +@@ -13,7 +13,7 @@ + + /* + * What should we do if we get a hw irq event on an illegal vector? +- * Each architecture has to answer this themself. ++ * Each architecture has to answer this themselves. + */ + static void ack_bad(struct irq_data *data) + { +diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c +index 9b0914a063f90..6c009a033c73f 100644 +--- a/kernel/irq/irqdesc.c ++++ b/kernel/irq/irqdesc.c +@@ -31,7 +31,7 @@ static int __init irq_affinity_setup(char *str) + cpulist_parse(str, irq_default_affinity); + /* + * Set at least the boot cpu. We don't want to end up with +- * bugreports caused by random comandline masks ++ * bugreports caused by random commandline masks + */ + cpumask_set_cpu(smp_processor_id(), irq_default_affinity); + return 1; +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index 1720998933f8d..fe07888a7d96a 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -53,7 +53,7 @@ EXPORT_SYMBOL_GPL(irqchip_fwnode_ops); + * @name: Optional user provided domain name + * @pa: Optional user-provided physical address + * +- * Allocate a struct irqchip_fwid, and return a poiner to the embedded ++ * Allocate a struct irqchip_fwid, and return a pointer to the embedded + * fwnode_handle (or NULL on failure). + * + * Note: The types IRQCHIP_FWNODE_NAMED and IRQCHIP_FWNODE_NAMED_ID are +@@ -657,7 +657,7 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + + pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); + +- /* Look for default domain if nececssary */ ++ /* Look for default domain if necessary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) { +@@ -893,7 +893,7 @@ unsigned int irq_find_mapping(struct irq_domain *domain, + { + struct irq_data *data; + +- /* Look for default domain if nececssary */ ++ /* Look for default domain if necessary */ + if (domain == NULL) + domain = irq_default_domain; + if (domain == NULL) +@@ -1423,7 +1423,7 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, + * The whole process to setup an IRQ has been split into two steps. + * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ + * descriptor and required hardware resources. The second step, +- * irq_domain_activate_irq(), is to program hardwares with preallocated ++ * irq_domain_activate_irq(), is to program the hardware with preallocated + * resources. In this way, it's easier to rollback when failing to + * allocate resources. + */ +diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c +index 437b073dc487e..0159925054faa 100644 +--- a/kernel/irq/manage.c ++++ b/kernel/irq/manage.c +@@ -341,7 +341,7 @@ static bool irq_set_affinity_deactivated(struct irq_data *data, + * If the interrupt is not yet activated, just store the affinity + * mask and do not call the chip driver at all. On activation the + * driver has to make sure anyway that the interrupt is in a +- * useable state so startup works. ++ * usable state so startup works. + */ + if (!IS_ENABLED(CONFIG_IRQ_DOMAIN_HIERARCHY) || + irqd_is_activated(data) || !irqd_affinity_on_activate(data)) +@@ -999,7 +999,7 @@ static void irq_finalize_oneshot(struct irq_desc *desc, + * to IRQS_INPROGRESS and the irq line is masked forever. + * + * This also serializes the state of shared oneshot handlers +- * versus "desc->threads_onehsot |= action->thread_mask;" in ++ * versus "desc->threads_oneshot |= action->thread_mask;" in + * irq_wake_thread(). See the comment there which explains the + * serialization. + */ +@@ -1877,7 +1877,7 @@ static struct irqaction *__free_irq(struct irq_desc *desc, void *dev_id) + /* Last action releases resources */ + if (!desc->action) { + /* +- * Reaquire bus lock as irq_release_resources() might ++ * Reacquire bus lock as irq_release_resources() might + * require it to deallocate resources over the slow bus. + */ + chip_bus_lock(desc); +diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c +index b47d95b68ac1a..4457f3e966d0e 100644 +--- a/kernel/irq/msi.c ++++ b/kernel/irq/msi.c +@@ -5,7 +5,7 @@ + * + * This file is licensed under GPLv2. + * +- * This file contains common code to support Message Signalled Interrupt for ++ * This file contains common code to support Message Signaled Interrupts for + * PCI compatible and non PCI compatible devices. + */ + #include +diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c +index 1f981162648a3..00d45b6bd8f89 100644 +--- a/kernel/irq/timings.c ++++ b/kernel/irq/timings.c +@@ -490,7 +490,7 @@ static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) + + /* + * The interrupt triggered more than one second apart, that +- * ends the sequence as predictible for our purpose. In this ++ * ends the sequence as predictable for our purpose. In this + * case, assume we have the beginning of a sequence and the + * timestamp is the first value. As it is impossible to + * predict anything at this point, return. +-- +2.39.2 + diff --git a/queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch b/queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch new file mode 100644 index 00000000000..aaebef9d836 --- /dev/null +++ b/queue-5.10/irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch @@ -0,0 +1,53 @@ +From bfa01f225d22fffdf704f267bcb38e4f9735562b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 16 Sep 2021 10:52:03 +0800 +Subject: irqdomain: Change the type of 'size' in __irq_domain_add() to be + consistent + +From: Bixuan Cui + +[ Upstream commit 20c36ce2164f1774b487d443ece99b754bc6ad43 ] + +The 'size' is used in struct_size(domain, revmap, size) and its input +parameter type is 'size_t'(unsigned int). +Changing the size to 'unsigned int' to make the type consistent. + +Signed-off-by: Bixuan Cui +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20210916025203.44841-1-cuibixuan@huawei.com +Stable-dep-of: 8932c32c3053 ("irqdomain: Fix domain registration race") +Signed-off-by: Sasha Levin +--- + include/linux/irqdomain.h | 2 +- + kernel/irq/irqdomain.c | 2 +- + 2 files changed, 2 insertions(+), 2 deletions(-) + +diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h +index ea5a337e0f8b8..9b9743f7538c4 100644 +--- a/include/linux/irqdomain.h ++++ b/include/linux/irqdomain.h +@@ -256,7 +256,7 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) + } + + void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, ++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data); +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index 245e317c72908..426242c8903d4 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -130,7 +130,7 @@ EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); + * Allocates and initializes an irq_domain structure. + * Returns pointer to IRQ domain, or NULL on failure. + */ +-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, ++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, + irq_hw_number_t hwirq_max, int direct_max, + const struct irq_domain_ops *ops, + void *host_data) +-- +2.39.2 + diff --git a/queue-5.10/irqdomain-fix-domain-registration-race.patch b/queue-5.10/irqdomain-fix-domain-registration-race.patch new file mode 100644 index 00000000000..a2085b76469 --- /dev/null +++ b/queue-5.10/irqdomain-fix-domain-registration-race.patch @@ -0,0 +1,134 @@ +From 4ec2537dd6d7bd9da61dca871a019630766b8f9b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Feb 2023 11:42:49 +0100 +Subject: irqdomain: Fix domain registration race + +From: Marc Zyngier + +[ Upstream commit 8932c32c3053accd50702b36e944ac2016cd103c ] + +Hierarchical domains created using irq_domain_create_hierarchy() are +currently added to the domain list before having been fully initialised. + +This specifically means that a racing allocation request might fail to +allocate irq data for the inner domains of a hierarchy in case the +parent domain pointer has not yet been set up. + +Note that this is not really any issue for irqchip drivers that are +registered early (e.g. via IRQCHIP_DECLARE() or IRQCHIP_ACPI_DECLARE()) +but could potentially cause trouble with drivers that are registered +later (e.g. modular drivers using IRQCHIP_PLATFORM_DRIVER_BEGIN(), +gpiochip drivers, etc.). + +Fixes: afb7da83b9f4 ("irqdomain: Introduce helper function irq_domain_add_hierarchy()") +Cc: stable@vger.kernel.org # 3.19 +Signed-off-by: Marc Zyngier +[ johan: add commit message ] +Signed-off-by: Johan Hovold +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230213104302.17307-8-johan+linaro@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 62 +++++++++++++++++++++++++++++------------- + 1 file changed, 43 insertions(+), 19 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index 426242c8903d4..fd3f7c16c299a 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -117,23 +117,12 @@ void irq_domain_free_fwnode(struct fwnode_handle *fwnode) + } + EXPORT_SYMBOL_GPL(irq_domain_free_fwnode); + +-/** +- * __irq_domain_add() - Allocate a new irq_domain data structure +- * @fwnode: firmware node for the interrupt controller +- * @size: Size of linear map; 0 for radix mapping only +- * @hwirq_max: Maximum number of interrupts supported by controller +- * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no +- * direct mapping +- * @ops: domain callbacks +- * @host_data: Controller private data pointer +- * +- * Allocates and initializes an irq_domain structure. +- * Returns pointer to IRQ domain, or NULL on failure. +- */ +-struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, +- irq_hw_number_t hwirq_max, int direct_max, +- const struct irq_domain_ops *ops, +- void *host_data) ++static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, ++ unsigned int size, ++ irq_hw_number_t hwirq_max, ++ int direct_max, ++ const struct irq_domain_ops *ops, ++ void *host_data) + { + struct irqchip_fwid *fwid; + struct irq_domain *domain; +@@ -210,12 +199,44 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int s + domain->revmap_direct_max_irq = direct_max; + irq_domain_check_hierarchy(domain); + ++ return domain; ++} ++ ++static void __irq_domain_publish(struct irq_domain *domain) ++{ + mutex_lock(&irq_domain_mutex); + debugfs_add_domain_dir(domain); + list_add(&domain->link, &irq_domain_list); + mutex_unlock(&irq_domain_mutex); + + pr_debug("Added domain %s\n", domain->name); ++} ++ ++/** ++ * __irq_domain_add() - Allocate a new irq_domain data structure ++ * @fwnode: firmware node for the interrupt controller ++ * @size: Size of linear map; 0 for radix mapping only ++ * @hwirq_max: Maximum number of interrupts supported by controller ++ * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no ++ * direct mapping ++ * @ops: domain callbacks ++ * @host_data: Controller private data pointer ++ * ++ * Allocates and initializes an irq_domain structure. ++ * Returns pointer to IRQ domain, or NULL on failure. ++ */ ++struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, unsigned int size, ++ irq_hw_number_t hwirq_max, int direct_max, ++ const struct irq_domain_ops *ops, ++ void *host_data) ++{ ++ struct irq_domain *domain; ++ ++ domain = __irq_domain_create(fwnode, size, hwirq_max, direct_max, ++ ops, host_data); ++ if (domain) ++ __irq_domain_publish(domain); ++ + return domain; + } + EXPORT_SYMBOL_GPL(__irq_domain_add); +@@ -1110,12 +1131,15 @@ struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + struct irq_domain *domain; + + if (size) +- domain = irq_domain_create_linear(fwnode, size, ops, host_data); ++ domain = __irq_domain_create(fwnode, size, size, 0, ops, host_data); + else +- domain = irq_domain_create_tree(fwnode, ops, host_data); ++ domain = __irq_domain_create(fwnode, 0, ~0, 0, ops, host_data); ++ + if (domain) { + domain->parent = parent; + domain->flags |= flags; ++ ++ __irq_domain_publish(domain); + } + + return domain; +-- +2.39.2 + diff --git a/queue-5.10/irqdomain-fix-mapping-creation-race.patch b/queue-5.10/irqdomain-fix-mapping-creation-race.patch new file mode 100644 index 00000000000..81b538155e4 --- /dev/null +++ b/queue-5.10/irqdomain-fix-mapping-creation-race.patch @@ -0,0 +1,184 @@ +From c1c69cf22e1bc56cc9387569f82fa6bd417f7172 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Feb 2023 11:42:48 +0100 +Subject: irqdomain: Fix mapping-creation race + +From: Johan Hovold + +[ Upstream commit 601363cc08da25747feb87c55573dd54de91d66a ] + +Parallel probing of devices that share interrupts (e.g. when a driver +uses asynchronous probing) can currently result in two mappings for the +same hardware interrupt to be created due to missing serialisation. + +Make sure to hold the irq_domain_mutex when creating mappings so that +looking for an existing mapping before creating a new one is done +atomically. + +Fixes: 765230b5f084 ("driver-core: add asynchronous probing support for drivers") +Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings") +Link: https://lore.kernel.org/r/YuJXMHoT4ijUxnRb@hovoldconsulting.com +Cc: stable@vger.kernel.org # 4.8 +Cc: Dmitry Torokhov +Cc: Jon Hunter +Tested-by: Hsin-Yi Wang +Tested-by: Mark-PK Tsai +Signed-off-by: Johan Hovold +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230213104302.17307-7-johan+linaro@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 64 ++++++++++++++++++++++++++++++------------ + 1 file changed, 46 insertions(+), 18 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index a1e1433a07754..245e317c72908 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -25,6 +25,9 @@ static DEFINE_MUTEX(irq_domain_mutex); + + static struct irq_domain *irq_default_domain; + ++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity); + static void irq_domain_check_hierarchy(struct irq_domain *domain); + + struct irqchip_fwid { +@@ -637,9 +640,9 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) + } + EXPORT_SYMBOL_GPL(irq_create_direct_mapping); + +-static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain, +- irq_hw_number_t hwirq, +- const struct irq_affinity_desc *affinity) ++static unsigned int irq_create_mapping_affinity_locked(struct irq_domain *domain, ++ irq_hw_number_t hwirq, ++ const struct irq_affinity_desc *affinity) + { + struct device_node *of_node = irq_domain_get_of_node(domain); + int virq; +@@ -654,7 +657,7 @@ static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain, + return 0; + } + +- if (irq_domain_associate(domain, virq, hwirq)) { ++ if (irq_domain_associate_locked(domain, virq, hwirq)) { + irq_free_desc(virq); + return 0; + } +@@ -690,14 +693,20 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + return 0; + } + ++ mutex_lock(&irq_domain_mutex); ++ + /* Check if mapping already exists */ + virq = irq_find_mapping(domain, hwirq); + if (virq) { + pr_debug("existing mapping on virq %d\n", virq); +- return virq; ++ goto out; + } + +- return __irq_create_mapping_affinity(domain, hwirq, affinity); ++ virq = irq_create_mapping_affinity_locked(domain, hwirq, affinity); ++out: ++ mutex_unlock(&irq_domain_mutex); ++ ++ return virq; + } + EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); + +@@ -799,6 +808,8 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) + if (WARN_ON(type & ~IRQ_TYPE_SENSE_MASK)) + type &= IRQ_TYPE_SENSE_MASK; + ++ mutex_lock(&irq_domain_mutex); ++ + /* + * If we've already configured this interrupt, + * don't do it again, or hell will break loose. +@@ -811,7 +822,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) + * interrupt number. + */ + if (type == IRQ_TYPE_NONE || type == irq_get_trigger_type(virq)) +- return virq; ++ goto out; + + /* + * If the trigger type has not been set yet, then set +@@ -819,35 +830,45 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) + */ + if (irq_get_trigger_type(virq) == IRQ_TYPE_NONE) { + irq_data = irq_get_irq_data(virq); +- if (!irq_data) +- return 0; ++ if (!irq_data) { ++ virq = 0; ++ goto out; ++ } + + irqd_set_trigger_type(irq_data, type); +- return virq; ++ goto out; + } + + pr_warn("type mismatch, failed to map hwirq-%lu for %s!\n", + hwirq, of_node_full_name(to_of_node(fwspec->fwnode))); +- return 0; ++ virq = 0; ++ goto out; + } + + if (irq_domain_is_hierarchy(domain)) { +- virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, fwspec); +- if (virq <= 0) +- return 0; ++ virq = irq_domain_alloc_irqs_locked(domain, -1, 1, NUMA_NO_NODE, ++ fwspec, false, NULL); ++ if (virq <= 0) { ++ virq = 0; ++ goto out; ++ } + } else { + /* Create mapping */ +- virq = __irq_create_mapping_affinity(domain, hwirq, NULL); ++ virq = irq_create_mapping_affinity_locked(domain, hwirq, NULL); + if (!virq) +- return virq; ++ goto out; + } + + irq_data = irq_get_irq_data(virq); +- if (WARN_ON(!irq_data)) +- return 0; ++ if (WARN_ON(!irq_data)) { ++ virq = 0; ++ goto out; ++ } + + /* Store trigger type */ + irqd_set_trigger_type(irq_data, type); ++out: ++ mutex_unlock(&irq_domain_mutex); + + return virq; + } +@@ -1856,6 +1877,13 @@ void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, + irq_set_handler_data(virq, handler_data); + } + ++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity) ++{ ++ return -EINVAL; ++} ++ + static void irq_domain_check_hierarchy(struct irq_domain *domain) + { + } +-- +2.39.2 + diff --git a/queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch b/queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch new file mode 100644 index 00000000000..2d8e47ee542 --- /dev/null +++ b/queue-5.10/irqdomain-look-for-existing-mapping-only-once.patch @@ -0,0 +1,133 @@ +From 85884bb78e6116baa316676ba22c6ffc9cba7a43 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Feb 2023 11:42:46 +0100 +Subject: irqdomain: Look for existing mapping only once + +From: Johan Hovold + +[ Upstream commit 6e6f75c9c98d2d246d90411ff2b6f0cd271f4cba ] + +Avoid looking for an existing mapping twice when creating a new mapping +using irq_create_fwspec_mapping() by factoring out the actual allocation +which is shared with irq_create_mapping_affinity(). + +The new helper function will also be used to fix a shared-interrupt +mapping race, hence the Fixes tag. + +Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings") +Cc: stable@vger.kernel.org # 4.8 +Tested-by: Hsin-Yi Wang +Tested-by: Mark-PK Tsai +Signed-off-by: Johan Hovold +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230213104302.17307-5-johan+linaro@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 60 +++++++++++++++++++++++------------------- + 1 file changed, 33 insertions(+), 27 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index fe07888a7d96a..d18c25a41673f 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -637,6 +637,34 @@ unsigned int irq_create_direct_mapping(struct irq_domain *domain) + } + EXPORT_SYMBOL_GPL(irq_create_direct_mapping); + ++static unsigned int __irq_create_mapping_affinity(struct irq_domain *domain, ++ irq_hw_number_t hwirq, ++ const struct irq_affinity_desc *affinity) ++{ ++ struct device_node *of_node = irq_domain_get_of_node(domain); ++ int virq; ++ ++ pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); ++ ++ /* Allocate a virtual interrupt number */ ++ virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), ++ affinity); ++ if (virq <= 0) { ++ pr_debug("-> virq allocation failed\n"); ++ return 0; ++ } ++ ++ if (irq_domain_associate(domain, virq, hwirq)) { ++ irq_free_desc(virq); ++ return 0; ++ } ++ ++ pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", ++ hwirq, of_node_full_name(of_node), virq); ++ ++ return virq; ++} ++ + /** + * irq_create_mapping_affinity() - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain +@@ -649,14 +677,11 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping); + * on the number returned from that call. + */ + unsigned int irq_create_mapping_affinity(struct irq_domain *domain, +- irq_hw_number_t hwirq, +- const struct irq_affinity_desc *affinity) ++ irq_hw_number_t hwirq, ++ const struct irq_affinity_desc *affinity) + { +- struct device_node *of_node; + int virq; + +- pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq); +- + /* Look for default domain if necessary */ + if (domain == NULL) + domain = irq_default_domain; +@@ -664,34 +689,15 @@ unsigned int irq_create_mapping_affinity(struct irq_domain *domain, + WARN(1, "%s(, %lx) called with NULL domain\n", __func__, hwirq); + return 0; + } +- pr_debug("-> using domain @%p\n", domain); +- +- of_node = irq_domain_get_of_node(domain); + + /* Check if mapping already exists */ + virq = irq_find_mapping(domain, hwirq); + if (virq) { +- pr_debug("-> existing mapping on virq %d\n", virq); ++ pr_debug("existing mapping on virq %d\n", virq); + return virq; + } + +- /* Allocate a virtual interrupt number */ +- virq = irq_domain_alloc_descs(-1, 1, hwirq, of_node_to_nid(of_node), +- affinity); +- if (virq <= 0) { +- pr_debug("-> virq allocation failed\n"); +- return 0; +- } +- +- if (irq_domain_associate(domain, virq, hwirq)) { +- irq_free_desc(virq); +- return 0; +- } +- +- pr_debug("irq %lu on domain %s mapped to virtual irq %u\n", +- hwirq, of_node_full_name(of_node), virq); +- +- return virq; ++ return __irq_create_mapping_affinity(domain, hwirq, affinity); + } + EXPORT_SYMBOL_GPL(irq_create_mapping_affinity); + +@@ -831,7 +837,7 @@ unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec) + return 0; + } else { + /* Create mapping */ +- virq = irq_create_mapping(domain, hwirq); ++ virq = __irq_create_mapping_affinity(domain, hwirq, NULL); + if (!virq) + return virq; + } +-- +2.39.2 + diff --git a/queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch b/queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch new file mode 100644 index 00000000000..4045dd7da81 --- /dev/null +++ b/queue-5.10/irqdomain-refactor-__irq_domain_alloc_irqs.patch @@ -0,0 +1,155 @@ +From af649d9740bab8166d36ad7c818c2e53c22945f6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 13 Feb 2023 11:42:47 +0100 +Subject: irqdomain: Refactor __irq_domain_alloc_irqs() + +From: Johan Hovold + +[ Upstream commit d55f7f4c58c07beb5050a834bf57ae2ede599c7e ] + +Refactor __irq_domain_alloc_irqs() so that it can be called internally +while holding the irq_domain_mutex. + +This will be used to fix a shared-interrupt mapping race, hence the +Fixes tag. + +Fixes: b62b2cf5759b ("irqdomain: Fix handling of type settings for existing mappings") +Cc: stable@vger.kernel.org # 4.8 +Tested-by: Hsin-Yi Wang +Tested-by: Mark-PK Tsai +Signed-off-by: Johan Hovold +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20230213104302.17307-6-johan+linaro@kernel.org +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 88 +++++++++++++++++++++++------------------- + 1 file changed, 48 insertions(+), 40 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index d18c25a41673f..a1e1433a07754 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -1411,40 +1411,12 @@ int irq_domain_alloc_irqs_hierarchy(struct irq_domain *domain, + return domain->ops->alloc(domain, irq_base, nr_irqs, arg); + } + +-/** +- * __irq_domain_alloc_irqs - Allocate IRQs from domain +- * @domain: domain to allocate from +- * @irq_base: allocate specified IRQ number if irq_base >= 0 +- * @nr_irqs: number of IRQs to allocate +- * @node: NUMA node id for memory allocation +- * @arg: domain specific argument +- * @realloc: IRQ descriptors have already been allocated if true +- * @affinity: Optional irq affinity mask for multiqueue devices +- * +- * Allocate IRQ numbers and initialized all data structures to support +- * hierarchy IRQ domains. +- * Parameter @realloc is mainly to support legacy IRQs. +- * Returns error code or allocated IRQ number +- * +- * The whole process to setup an IRQ has been split into two steps. +- * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ +- * descriptor and required hardware resources. The second step, +- * irq_domain_activate_irq(), is to program the hardware with preallocated +- * resources. In this way, it's easier to rollback when failing to +- * allocate resources. +- */ +-int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, +- unsigned int nr_irqs, int node, void *arg, +- bool realloc, const struct irq_affinity_desc *affinity) ++static int irq_domain_alloc_irqs_locked(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity) + { + int i, ret, virq; + +- if (domain == NULL) { +- domain = irq_default_domain; +- if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) +- return -EINVAL; +- } +- + if (realloc && irq_base >= 0) { + virq = irq_base; + } else { +@@ -1463,24 +1435,18 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + goto out_free_desc; + } + +- mutex_lock(&irq_domain_mutex); + ret = irq_domain_alloc_irqs_hierarchy(domain, virq, nr_irqs, arg); +- if (ret < 0) { +- mutex_unlock(&irq_domain_mutex); ++ if (ret < 0) + goto out_free_irq_data; +- } + + for (i = 0; i < nr_irqs; i++) { + ret = irq_domain_trim_hierarchy(virq + i); +- if (ret) { +- mutex_unlock(&irq_domain_mutex); ++ if (ret) + goto out_free_irq_data; +- } + } +- ++ + for (i = 0; i < nr_irqs; i++) + irq_domain_insert_irq(virq + i); +- mutex_unlock(&irq_domain_mutex); + + return virq; + +@@ -1491,6 +1457,48 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, + return ret; + } + ++/** ++ * __irq_domain_alloc_irqs - Allocate IRQs from domain ++ * @domain: domain to allocate from ++ * @irq_base: allocate specified IRQ number if irq_base >= 0 ++ * @nr_irqs: number of IRQs to allocate ++ * @node: NUMA node id for memory allocation ++ * @arg: domain specific argument ++ * @realloc: IRQ descriptors have already been allocated if true ++ * @affinity: Optional irq affinity mask for multiqueue devices ++ * ++ * Allocate IRQ numbers and initialized all data structures to support ++ * hierarchy IRQ domains. ++ * Parameter @realloc is mainly to support legacy IRQs. ++ * Returns error code or allocated IRQ number ++ * ++ * The whole process to setup an IRQ has been split into two steps. ++ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ ++ * descriptor and required hardware resources. The second step, ++ * irq_domain_activate_irq(), is to program the hardware with preallocated ++ * resources. In this way, it's easier to rollback when failing to ++ * allocate resources. ++ */ ++int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, ++ unsigned int nr_irqs, int node, void *arg, ++ bool realloc, const struct irq_affinity_desc *affinity) ++{ ++ int ret; ++ ++ if (domain == NULL) { ++ domain = irq_default_domain; ++ if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n")) ++ return -EINVAL; ++ } ++ ++ mutex_lock(&irq_domain_mutex); ++ ret = irq_domain_alloc_irqs_locked(domain, irq_base, nr_irqs, node, arg, ++ realloc, affinity); ++ mutex_unlock(&irq_domain_mutex); ++ ++ return ret; ++} ++ + /* The irq_data was moved, fix the revmap to refer to the new location */ + static void irq_domain_fix_revmap(struct irq_data *d) + { +-- +2.39.2 + diff --git a/queue-5.10/landlock-add-object-management.patch b/queue-5.10/landlock-add-object-management.patch new file mode 100644 index 00000000000..3fa753b3eac --- /dev/null +++ b/queue-5.10/landlock-add-object-management.patch @@ -0,0 +1,318 @@ +From ab07529d96375f4117929020ea88db6ed07d8abf Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Apr 2021 17:41:11 +0200 +Subject: landlock: Add object management +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +[ Upstream commit 90945448e9830aa1b39d7acaa4e0724a001e2ff8 ] + +A Landlock object enables to identify a kernel object (e.g. an inode). +A Landlock rule is a set of access rights allowed on an object. Rules +are grouped in rulesets that may be tied to a set of processes (i.e. +subjects) to enforce a scoped access-control (i.e. a domain). + +Because Landlock's goal is to empower any process (especially +unprivileged ones) to sandbox themselves, we cannot rely on a +system-wide object identification such as file extended attributes. +Indeed, we need innocuous, composable and modular access-controls. + +The main challenge with these constraints is to identify kernel objects +while this identification is useful (i.e. when a security policy makes +use of this object). But this identification data should be freed once +no policy is using it. This ephemeral tagging should not and may not be +written in the filesystem. We then need to manage the lifetime of a +rule according to the lifetime of its objects. To avoid a global lock, +this implementation make use of RCU and counters to safely reference +objects. + +A following commit uses this generic object management for inodes. + +Cc: James Morris +Signed-off-by: Mickaël Salaün +Reviewed-by: Jann Horn +Acked-by: Serge Hallyn +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20210422154123.13086-2-mic@digikod.net +Signed-off-by: James Morris +Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported") +Signed-off-by: Sasha Levin +--- + MAINTAINERS | 10 +++++ + security/Kconfig | 1 + + security/Makefile | 2 + + security/landlock/Kconfig | 21 +++++++++ + security/landlock/Makefile | 3 ++ + security/landlock/object.c | 67 ++++++++++++++++++++++++++++ + security/landlock/object.h | 91 ++++++++++++++++++++++++++++++++++++++ + 7 files changed, 195 insertions(+) + create mode 100644 security/landlock/Kconfig + create mode 100644 security/landlock/Makefile + create mode 100644 security/landlock/object.c + create mode 100644 security/landlock/object.h + +diff --git a/MAINTAINERS b/MAINTAINERS +index 6c5efc4013ab5..72815c1a325eb 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -9836,6 +9836,16 @@ F: net/core/sock_map.c + F: net/ipv4/tcp_bpf.c + F: net/ipv4/udp_bpf.c + ++LANDLOCK SECURITY MODULE ++M: Mickaël Salaün ++L: linux-security-module@vger.kernel.org ++S: Supported ++W: https://landlock.io ++T: git https://github.com/landlock-lsm/linux.git ++F: security/landlock/ ++K: landlock ++K: LANDLOCK ++ + LANTIQ / INTEL Ethernet drivers + M: Hauke Mehrtens + L: netdev@vger.kernel.org +diff --git a/security/Kconfig b/security/Kconfig +index 9893c316da897..7cb5476306676 100644 +--- a/security/Kconfig ++++ b/security/Kconfig +@@ -230,6 +230,7 @@ source "security/loadpin/Kconfig" + source "security/yama/Kconfig" + source "security/safesetid/Kconfig" + source "security/lockdown/Kconfig" ++source "security/landlock/Kconfig" + + source "security/integrity/Kconfig" + +diff --git a/security/Makefile b/security/Makefile +index 3baf435de5411..47e432900e242 100644 +--- a/security/Makefile ++++ b/security/Makefile +@@ -13,6 +13,7 @@ subdir-$(CONFIG_SECURITY_LOADPIN) += loadpin + subdir-$(CONFIG_SECURITY_SAFESETID) += safesetid + subdir-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown + subdir-$(CONFIG_BPF_LSM) += bpf ++subdir-$(CONFIG_SECURITY_LANDLOCK) += landlock + + # always enable default capabilities + obj-y += commoncap.o +@@ -32,6 +33,7 @@ obj-$(CONFIG_SECURITY_SAFESETID) += safesetid/ + obj-$(CONFIG_SECURITY_LOCKDOWN_LSM) += lockdown/ + obj-$(CONFIG_CGROUPS) += device_cgroup.o + obj-$(CONFIG_BPF_LSM) += bpf/ ++obj-$(CONFIG_SECURITY_LANDLOCK) += landlock/ + + # Object integrity file lists + subdir-$(CONFIG_INTEGRITY) += integrity +diff --git a/security/landlock/Kconfig b/security/landlock/Kconfig +new file mode 100644 +index 0000000000000..c1e862a384107 +--- /dev/null ++++ b/security/landlock/Kconfig +@@ -0,0 +1,21 @@ ++# SPDX-License-Identifier: GPL-2.0-only ++ ++config SECURITY_LANDLOCK ++ bool "Landlock support" ++ depends on SECURITY ++ select SECURITY_PATH ++ help ++ Landlock is a sandboxing mechanism that enables processes to restrict ++ themselves (and their future children) by gradually enforcing ++ tailored access control policies. A Landlock security policy is a ++ set of access rights (e.g. open a file in read-only, make a ++ directory, etc.) tied to a file hierarchy. Such policy can be ++ configured and enforced by any processes for themselves using the ++ dedicated system calls: landlock_create_ruleset(), ++ landlock_add_rule(), and landlock_restrict_self(). ++ ++ See Documentation/userspace-api/landlock.rst for further information. ++ ++ If you are unsure how to answer this question, answer N. Otherwise, ++ you should also prepend "landlock," to the content of CONFIG_LSM to ++ enable Landlock at boot time. +diff --git a/security/landlock/Makefile b/security/landlock/Makefile +new file mode 100644 +index 0000000000000..cb6deefbf4c09 +--- /dev/null ++++ b/security/landlock/Makefile +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o ++ ++landlock-y := object.o +diff --git a/security/landlock/object.c b/security/landlock/object.c +new file mode 100644 +index 0000000000000..d674fdf9ff04f +--- /dev/null ++++ b/security/landlock/object.c +@@ -0,0 +1,67 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Landlock LSM - Object management ++ * ++ * Copyright © 2016-2020 Mickaël Salaün ++ * Copyright © 2018-2020 ANSSI ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "object.h" ++ ++struct landlock_object *landlock_create_object( ++ const struct landlock_object_underops *const underops, ++ void *const underobj) ++{ ++ struct landlock_object *new_object; ++ ++ if (WARN_ON_ONCE(!underops || !underobj)) ++ return ERR_PTR(-ENOENT); ++ new_object = kzalloc(sizeof(*new_object), GFP_KERNEL_ACCOUNT); ++ if (!new_object) ++ return ERR_PTR(-ENOMEM); ++ refcount_set(&new_object->usage, 1); ++ spin_lock_init(&new_object->lock); ++ new_object->underops = underops; ++ new_object->underobj = underobj; ++ return new_object; ++} ++ ++/* ++ * The caller must own the object (i.e. thanks to object->usage) to safely put ++ * it. ++ */ ++void landlock_put_object(struct landlock_object *const object) ++{ ++ /* ++ * The call to @object->underops->release(object) might sleep, e.g. ++ * because of iput(). ++ */ ++ might_sleep(); ++ if (!object) ++ return; ++ ++ /* ++ * If the @object's refcount cannot drop to zero, we can just decrement ++ * the refcount without holding a lock. Otherwise, the decrement must ++ * happen under @object->lock for synchronization with things like ++ * get_inode_object(). ++ */ ++ if (refcount_dec_and_lock(&object->usage, &object->lock)) { ++ __acquire(&object->lock); ++ /* ++ * With @object->lock initially held, remove the reference from ++ * @object->underobj to @object (if it still exists). ++ */ ++ object->underops->release(object); ++ kfree_rcu(object, rcu_free); ++ } ++} +diff --git a/security/landlock/object.h b/security/landlock/object.h +new file mode 100644 +index 0000000000000..3f80674c6c8d3 +--- /dev/null ++++ b/security/landlock/object.h +@@ -0,0 +1,91 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++/* ++ * Landlock LSM - Object management ++ * ++ * Copyright © 2016-2020 Mickaël Salaün ++ * Copyright © 2018-2020 ANSSI ++ */ ++ ++#ifndef _SECURITY_LANDLOCK_OBJECT_H ++#define _SECURITY_LANDLOCK_OBJECT_H ++ ++#include ++#include ++#include ++ ++struct landlock_object; ++ ++/** ++ * struct landlock_object_underops - Operations on an underlying object ++ */ ++struct landlock_object_underops { ++ /** ++ * @release: Releases the underlying object (e.g. iput() for an inode). ++ */ ++ void (*release)(struct landlock_object *const object) ++ __releases(object->lock); ++}; ++ ++/** ++ * struct landlock_object - Security blob tied to a kernel object ++ * ++ * The goal of this structure is to enable to tie a set of ephemeral access ++ * rights (pertaining to different domains) to a kernel object (e.g an inode) ++ * in a safe way. This implies to handle concurrent use and modification. ++ * ++ * The lifetime of a &struct landlock_object depends on the rules referring to ++ * it. ++ */ ++struct landlock_object { ++ /** ++ * @usage: This counter is used to tie an object to the rules matching ++ * it or to keep it alive while adding a new rule. If this counter ++ * reaches zero, this struct must not be modified, but this counter can ++ * still be read from within an RCU read-side critical section. When ++ * adding a new rule to an object with a usage counter of zero, we must ++ * wait until the pointer to this object is set to NULL (or recycled). ++ */ ++ refcount_t usage; ++ /** ++ * @lock: Protects against concurrent modifications. This lock must be ++ * held from the time @usage drops to zero until any weak references ++ * from @underobj to this object have been cleaned up. ++ * ++ * Lock ordering: inode->i_lock nests inside this. ++ */ ++ spinlock_t lock; ++ /** ++ * @underobj: Used when cleaning up an object and to mark an object as ++ * tied to its underlying kernel structure. This pointer is protected ++ * by @lock. Cf. landlock_release_inodes() and release_inode(). ++ */ ++ void *underobj; ++ union { ++ /** ++ * @rcu_free: Enables lockless use of @usage, @lock and ++ * @underobj from within an RCU read-side critical section. ++ * @rcu_free and @underops are only used by ++ * landlock_put_object(). ++ */ ++ struct rcu_head rcu_free; ++ /** ++ * @underops: Enables landlock_put_object() to release the ++ * underlying object (e.g. inode). ++ */ ++ const struct landlock_object_underops *underops; ++ }; ++}; ++ ++struct landlock_object *landlock_create_object( ++ const struct landlock_object_underops *const underops, ++ void *const underobj); ++ ++void landlock_put_object(struct landlock_object *const object); ++ ++static inline void landlock_get_object(struct landlock_object *const object) ++{ ++ if (object) ++ refcount_inc(&object->usage); ++} ++ ++#endif /* _SECURITY_LANDLOCK_OBJECT_H */ +-- +2.39.2 + diff --git a/queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch b/queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch new file mode 100644 index 00000000000..c75eb303943 --- /dev/null +++ b/queue-5.10/net-caif-fix-use-after-free-in-cfusbl_device_notify.patch @@ -0,0 +1,86 @@ +From 9a4a568b921a12aa558adab24b2201df4e5b2419 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 01:39:13 +0900 +Subject: net: caif: Fix use-after-free in cfusbl_device_notify() + +From: Shigeru Yoshida + +[ Upstream commit 9781e98a97110f5e76999058368b4be76a788484 ] + +syzbot reported use-after-free in cfusbl_device_notify() [1]. This +causes a stack trace like below: + +BUG: KASAN: use-after-free in cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 +Read of size 8 at addr ffff88807ac4e6f0 by task kworker/u4:6/1214 + +CPU: 0 PID: 1214 Comm: kworker/u4:6 Not tainted 5.19.0-rc3-syzkaller-00146-g92f20ff72066 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +Workqueue: netns cleanup_net +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_address_description.constprop.0.cold+0xeb/0x467 mm/kasan/report.c:313 + print_report mm/kasan/report.c:429 [inline] + kasan_report.cold+0xf4/0x1c6 mm/kasan/report.c:491 + cfusbl_device_notify+0x7c9/0x870 net/caif/caif_usb.c:138 + notifier_call_chain+0xb5/0x200 kernel/notifier.c:87 + call_netdevice_notifiers_info+0xb5/0x130 net/core/dev.c:1945 + call_netdevice_notifiers_extack net/core/dev.c:1983 [inline] + call_netdevice_notifiers net/core/dev.c:1997 [inline] + netdev_wait_allrefs_any net/core/dev.c:10227 [inline] + netdev_run_todo+0xbc0/0x10f0 net/core/dev.c:10341 + default_device_exit_batch+0x44e/0x590 net/core/dev.c:11334 + ops_exit_list+0x125/0x170 net/core/net_namespace.c:167 + cleanup_net+0x4ea/0xb00 net/core/net_namespace.c:594 + process_one_work+0x996/0x1610 kernel/workqueue.c:2289 + worker_thread+0x665/0x1080 kernel/workqueue.c:2436 + kthread+0x2e9/0x3a0 kernel/kthread.c:376 + ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:302 + + +When unregistering a net device, unregister_netdevice_many_notify() +sets the device's reg_state to NETREG_UNREGISTERING, calls notifiers +with NETDEV_UNREGISTER, and adds the device to the todo list. + +Later on, devices in the todo list are processed by netdev_run_todo(). +netdev_run_todo() waits devices' reference count become 1 while +rebdoadcasting NETDEV_UNREGISTER notification. + +When cfusbl_device_notify() is called with NETDEV_UNREGISTER multiple +times, the parent device might be freed. This could cause UAF. +Processing NETDEV_UNREGISTER multiple times also causes inbalance of +reference count for the module. + +This patch fixes the issue by accepting only first NETDEV_UNREGISTER +notification. + +Fixes: 7ad65bf68d70 ("caif: Add support for CAIF over CDC NCM USB interface") +CC: sjur.brandeland@stericsson.com +Reported-by: syzbot+b563d33852b893653a9e@syzkaller.appspotmail.com +Link: https://syzkaller.appspot.com/bug?id=c3bfd8e2450adab3bffe4d80821fbbced600407f [1] +Signed-off-by: Shigeru Yoshida +Link: https://lore.kernel.org/r/20230301163913.391304-1-syoshida@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/caif/caif_usb.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/caif/caif_usb.c b/net/caif/caif_usb.c +index b02e1292f7f19..24488a4e2d26e 100644 +--- a/net/caif/caif_usb.c ++++ b/net/caif/caif_usb.c +@@ -134,6 +134,9 @@ static int cfusbl_device_notify(struct notifier_block *me, unsigned long what, + struct usb_device *usbdev; + int res; + ++ if (what == NETDEV_UNREGISTER && dev->reg_state >= NETREG_UNREGISTERED) ++ return 0; ++ + /* Check whether we have a NCM device, and find its VID/PID. */ + if (!(dev->dev.parent && dev->dev.parent->driver && + strcmp(dev->dev.parent->driver->name, "cdc_ncm") == 0)) +-- +2.39.2 + diff --git a/queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch b/queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch new file mode 100644 index 00000000000..a4dbce32d01 --- /dev/null +++ b/queue-5.10/net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch @@ -0,0 +1,71 @@ +From 087a540b3deaabe61acf91b3ef7446bd9a2519ee Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 Mar 2023 13:43:20 +0000 +Subject: net: ethernet: mtk_eth_soc: fix RX data corruption issue +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Daniel Golle + +[ Upstream commit 193250ace270fecd586dd2d0dfbd9cbd2ade977f ] + +Fix data corruption issue with SerDes connected PHYs operating at 1.25 +Gbps speed where we could previously observe about 30% packet loss while +the bad packet counter was increasing. + +As almost all boards with MediaTek MT7622 or MT7986 use either the MT7531 +switch IC operating at 3.125Gbps SerDes rate or single-port PHYs using +rate-adaptation to 2500Base-X mode, this issue only got exposed now when +we started trying to use SFP modules operating with 1.25 Gbps with the +BananaPi R3 board. + +The fix is to set bit 12 which disables the RX FIFO clear function when +setting up MAC MCR, MediaTek SDK did the same change stating: +"If without this patch, kernel might receive invalid packets that are +corrupted by GMAC."[1] + +[1]: https://git01.mediatek.com/plugins/gitiles/openwrt/feeds/mtk-openwrt-feeds/+/d8a2975939a12686c4a95c40db21efdc3f821f63 + +Fixes: 42c03844e93d ("net-next: mediatek: add support for MediaTek MT7622 SoC") +Tested-by: Bjørn Mork +Signed-off-by: Daniel Golle +Reviewed-by: Vladimir Oltean +Reviewed-by: Florian Fainelli +Link: https://lore.kernel.org/r/138da2735f92c8b6f8578ec2e5a794ee515b665f.1677937317.git.daniel@makrotopia.org +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 ++- + drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +index 217dc67c48fa2..a8319295f1ab2 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c +@@ -354,7 +354,8 @@ static void mtk_mac_config(struct phylink_config *config, unsigned int mode, + mcr_cur = mtk_r32(mac->hw, MTK_MAC_MCR(mac->id)); + mcr_new = mcr_cur; + mcr_new |= MAC_MCR_MAX_RX_1536 | MAC_MCR_IPG_CFG | MAC_MCR_FORCE_MODE | +- MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK; ++ MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN | MAC_MCR_FORCE_LINK | ++ MAC_MCR_RX_FIFO_CLR_DIS; + + /* Only update control register when needed! */ + if (mcr_new != mcr_cur) +diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +index 54a7cd93cc0fe..0ca3223ad5457 100644 +--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h ++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h +@@ -339,6 +339,7 @@ + #define MAC_MCR_FORCE_MODE BIT(15) + #define MAC_MCR_TX_EN BIT(14) + #define MAC_MCR_RX_EN BIT(13) ++#define MAC_MCR_RX_FIFO_CLR_DIS BIT(12) + #define MAC_MCR_BACKOFF_EN BIT(9) + #define MAC_MCR_BACKPR_EN BIT(8) + #define MAC_MCR_FORCE_RX_FC BIT(5) +-- +2.39.2 + diff --git a/queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch b/queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch new file mode 100644 index 00000000000..14265f7cc43 --- /dev/null +++ b/queue-5.10/net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch @@ -0,0 +1,126 @@ +From 3ca18a6f22982efdd0b4321d6431dacd3483658b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 1 Mar 2023 08:43:07 -0700 +Subject: net: lan78xx: fix accessing the LAN7800's internal phy specific + registers from the MAC driver + +From: Yuiko Oshino + +[ Upstream commit e57cf3639c323eeed05d3725fd82f91b349adca8 ] + +Move the LAN7800 internal phy (phy ID 0x0007c132) specific register +accesses to the phy driver (microchip.c). + +Fix the error reported by Enguerrand de Ribaucourt in December 2022, +"Some operations during the cable switch workaround modify the register +LAN88XX_INT_MASK of the PHY. However, this register is specific to the +LAN8835 PHY. For instance, if a DP8322I PHY is connected to the LAN7801, +that register (0x19), corresponds to the LED and MAC address +configuration, resulting in unapropriate behavior." + +I did not test with the DP8322I PHY, but I tested with an EVB-LAN7800 +with the internal PHY. + +Fixes: 14437e3fa284 ("lan78xx: workaround of forced 100 Full/Half duplex mode error") +Signed-off-by: Yuiko Oshino +Reviewed-by: Andrew Lunn +Link: https://lore.kernel.org/r/20230301154307.30438-1-yuiko.oshino@microchip.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/phy/microchip.c | 32 ++++++++++++++++++++++++++++++++ + drivers/net/usb/lan78xx.c | 27 +-------------------------- + 2 files changed, 33 insertions(+), 26 deletions(-) + +diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c +index a644e8e5071c3..375bbd60b38af 100644 +--- a/drivers/net/phy/microchip.c ++++ b/drivers/net/phy/microchip.c +@@ -326,6 +326,37 @@ static int lan88xx_config_aneg(struct phy_device *phydev) + return genphy_config_aneg(phydev); + } + ++static void lan88xx_link_change_notify(struct phy_device *phydev) ++{ ++ int temp; ++ ++ /* At forced 100 F/H mode, chip may fail to set mode correctly ++ * when cable is switched between long(~50+m) and short one. ++ * As workaround, set to 10 before setting to 100 ++ * at forced 100 F/H mode. ++ */ ++ if (!phydev->autoneg && phydev->speed == 100) { ++ /* disable phy interrupt */ ++ temp = phy_read(phydev, LAN88XX_INT_MASK); ++ temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; ++ phy_write(phydev, LAN88XX_INT_MASK, temp); ++ ++ temp = phy_read(phydev, MII_BMCR); ++ temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); ++ phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ ++ temp |= BMCR_SPEED100; ++ phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ ++ ++ /* clear pending interrupt generated while workaround */ ++ temp = phy_read(phydev, LAN88XX_INT_STS); ++ ++ /* enable phy interrupt back */ ++ temp = phy_read(phydev, LAN88XX_INT_MASK); ++ temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; ++ phy_write(phydev, LAN88XX_INT_MASK, temp); ++ } ++} ++ + static struct phy_driver microchip_phy_driver[] = { + { + .phy_id = 0x0007c130, +@@ -339,6 +370,7 @@ static struct phy_driver microchip_phy_driver[] = { + + .config_init = lan88xx_config_init, + .config_aneg = lan88xx_config_aneg, ++ .link_change_notify = lan88xx_link_change_notify, + + .ack_interrupt = lan88xx_phy_ack_interrupt, + .config_intr = lan88xx_phy_config_intr, +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index 0b5b4f9c7c5b9..667984efeb3be 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -1843,33 +1843,8 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) + static void lan78xx_link_status_change(struct net_device *net) + { + struct phy_device *phydev = net->phydev; +- int temp; +- +- /* At forced 100 F/H mode, chip may fail to set mode correctly +- * when cable is switched between long(~50+m) and short one. +- * As workaround, set to 10 before setting to 100 +- * at forced 100 F/H mode. +- */ +- if (!phydev->autoneg && (phydev->speed == 100)) { +- /* disable phy interrupt */ +- temp = phy_read(phydev, LAN88XX_INT_MASK); +- temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; +- phy_write(phydev, LAN88XX_INT_MASK, temp); + +- temp = phy_read(phydev, MII_BMCR); +- temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); +- phy_write(phydev, MII_BMCR, temp); /* set to 10 first */ +- temp |= BMCR_SPEED100; +- phy_write(phydev, MII_BMCR, temp); /* set to 100 later */ +- +- /* clear pending interrupt generated while workaround */ +- temp = phy_read(phydev, LAN88XX_INT_STS); +- +- /* enable phy interrupt back */ +- temp = phy_read(phydev, LAN88XX_INT_MASK); +- temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; +- phy_write(phydev, LAN88XX_INT_MASK, temp); +- } ++ phy_print_status(phydev); + } + + static int irq_map(struct irq_domain *d, unsigned int irq, +-- +2.39.2 + diff --git a/queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch b/queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch new file mode 100644 index 00000000000..26d1bd47ed8 --- /dev/null +++ b/queue-5.10/net-phylib-get-rid-of-unnecessary-locking.patch @@ -0,0 +1,174 @@ +From d6acee209596f0b12703c102f93bb74f3a892cdd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 16:37:54 +0000 +Subject: net: phylib: get rid of unnecessary locking + +From: Russell King (Oracle) + +[ Upstream commit f4b47a2e9463950df3e7c8b70e017877c1d4eb11 ] + +The locking in phy_probe() and phy_remove() does very little to prevent +any races with e.g. phy_attach_direct(), but instead causes lockdep ABBA +warnings. Remove it. + +====================================================== +WARNING: possible circular locking dependency detected +6.2.0-dirty #1108 Tainted: G W E +------------------------------------------------------ +ip/415 is trying to acquire lock: +ffff5c268f81ef50 (&dev->lock){+.+.}-{3:3}, at: phy_attach_direct+0x17c/0x3a0 [libphy] + +but task is already holding lock: +ffffaef6496cb518 (rtnl_mutex){+.+.}-{3:3}, at: rtnetlink_rcv_msg+0x154/0x560 + +which lock already depends on the new lock. + +the existing dependency chain (in reverse order) is: + +-> #1 (rtnl_mutex){+.+.}-{3:3}: + __lock_acquire+0x35c/0x6c0 + lock_acquire.part.0+0xcc/0x220 + lock_acquire+0x68/0x84 + __mutex_lock+0x8c/0x414 + mutex_lock_nested+0x34/0x40 + rtnl_lock+0x24/0x30 + sfp_bus_add_upstream+0x34/0x150 + phy_sfp_probe+0x4c/0x94 [libphy] + mv3310_probe+0x148/0x184 [marvell10g] + phy_probe+0x8c/0x200 [libphy] + call_driver_probe+0xbc/0x15c + really_probe+0xc0/0x320 + __driver_probe_device+0x84/0x120 + driver_probe_device+0x44/0x120 + __device_attach_driver+0xc4/0x160 + bus_for_each_drv+0x80/0xe0 + __device_attach+0xb0/0x1f0 + device_initial_probe+0x1c/0x2c + bus_probe_device+0xa4/0xb0 + device_add+0x360/0x53c + phy_device_register+0x60/0xa4 [libphy] + fwnode_mdiobus_phy_device_register+0xc0/0x190 [fwnode_mdio] + fwnode_mdiobus_register_phy+0x160/0xd80 [fwnode_mdio] + of_mdiobus_register+0x140/0x340 [of_mdio] + orion_mdio_probe+0x298/0x3c0 [mvmdio] + platform_probe+0x70/0xe0 + call_driver_probe+0x34/0x15c + really_probe+0xc0/0x320 + __driver_probe_device+0x84/0x120 + driver_probe_device+0x44/0x120 + __driver_attach+0x104/0x210 + bus_for_each_dev+0x78/0xdc + driver_attach+0x2c/0x3c + bus_add_driver+0x184/0x240 + driver_register+0x80/0x13c + __platform_driver_register+0x30/0x3c + xt_compat_calc_jump+0x28/0xa4 [x_tables] + do_one_initcall+0x50/0x1b0 + do_init_module+0x50/0x1fc + load_module+0x684/0x744 + __do_sys_finit_module+0xc4/0x140 + __arm64_sys_finit_module+0x28/0x34 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0x6c/0x1b0 + do_el0_svc+0x34/0x44 + el0_svc+0x48/0xf0 + el0t_64_sync_handler+0xb8/0xc0 + el0t_64_sync+0x1a0/0x1a4 + +-> #0 (&dev->lock){+.+.}-{3:3}: + check_prev_add+0xb4/0xc80 + validate_chain+0x414/0x47c + __lock_acquire+0x35c/0x6c0 + lock_acquire.part.0+0xcc/0x220 + lock_acquire+0x68/0x84 + __mutex_lock+0x8c/0x414 + mutex_lock_nested+0x34/0x40 + phy_attach_direct+0x17c/0x3a0 [libphy] + phylink_fwnode_phy_connect.part.0+0x70/0xe4 [phylink] + phylink_fwnode_phy_connect+0x48/0x60 [phylink] + mvpp2_open+0xec/0x2e0 [mvpp2] + __dev_open+0x104/0x214 + __dev_change_flags+0x1d4/0x254 + dev_change_flags+0x2c/0x7c + do_setlink+0x254/0xa50 + __rtnl_newlink+0x430/0x514 + rtnl_newlink+0x58/0x8c + rtnetlink_rcv_msg+0x17c/0x560 + netlink_rcv_skb+0x64/0x150 + rtnetlink_rcv+0x20/0x30 + netlink_unicast+0x1d4/0x2b4 + netlink_sendmsg+0x1a4/0x400 + ____sys_sendmsg+0x228/0x290 + ___sys_sendmsg+0x88/0xec + __sys_sendmsg+0x70/0xd0 + __arm64_sys_sendmsg+0x2c/0x40 + invoke_syscall+0x50/0x120 + el0_svc_common.constprop.0+0x6c/0x1b0 + do_el0_svc+0x34/0x44 + el0_svc+0x48/0xf0 + el0t_64_sync_handler+0xb8/0xc0 + el0t_64_sync+0x1a0/0x1a4 + +other info that might help us debug this: + + Possible unsafe locking scenario: + + CPU0 CPU1 + ---- ---- + lock(rtnl_mutex); + lock(&dev->lock); + lock(rtnl_mutex); + lock(&dev->lock); + + *** DEADLOCK *** + +Fixes: 298e54fa810e ("net: phy: add core phylib sfp support") +Reported-by: Marc Zyngier +Signed-off-by: Russell King (Oracle) +Reviewed-by: Andrew Lunn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/phy/phy_device.c | 8 +------- + 1 file changed, 1 insertion(+), 7 deletions(-) + +diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c +index 3ef5aa6b72a7e..e771e0e8a9bc6 100644 +--- a/drivers/net/phy/phy_device.c ++++ b/drivers/net/phy/phy_device.c +@@ -2833,8 +2833,6 @@ static int phy_probe(struct device *dev) + if (phydrv->flags & PHY_IS_INTERNAL) + phydev->is_internal = true; + +- mutex_lock(&phydev->lock); +- + /* Deassert the reset signal */ + phy_device_reset(phydev, 0); + +@@ -2903,12 +2901,10 @@ static int phy_probe(struct device *dev) + phydev->state = PHY_READY; + + out: +- /* Assert the reset signal */ ++ /* Re-assert the reset signal on error */ + if (err) + phy_device_reset(phydev, 1); + +- mutex_unlock(&phydev->lock); +- + return err; + } + +@@ -2918,9 +2914,7 @@ static int phy_remove(struct device *dev) + + cancel_delayed_work_sync(&phydev->state_queue); + +- mutex_lock(&phydev->lock); + phydev->state = PHY_DOWN; +- mutex_unlock(&phydev->lock); + + sfp_bus_del_upstream(phydev->sfp_bus); + phydev->sfp_bus = NULL; +-- +2.39.2 + diff --git a/queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch b/queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch new file mode 100644 index 00000000000..dad9b69d1b1 --- /dev/null +++ b/queue-5.10/net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch @@ -0,0 +1,74 @@ +From 15737a362a18c35944df6ef005f6b797cc011146 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 Mar 2023 11:23:46 +0800 +Subject: net/smc: fix fallback failed while sendmsg with fastopen + +From: D. Wythe + +[ Upstream commit ce7ca794712f186da99719e8b4e97bd5ddbb04c3 ] + +Before determining whether the msg has unsupported options, it has been +prematurely terminated by the wrong status check. + +For the application, the general usages of MSG_FASTOPEN likes + +fd = socket(...) +/* rather than connect */ +sendto(fd, data, len, MSG_FASTOPEN) + +Hence, We need to check the flag before state check, because the sock +state here is always SMC_INIT when applications tries MSG_FASTOPEN. +Once we found unsupported options, fallback it to TCP. + +Fixes: ee9dfbef02d1 ("net/smc: handle sockopts forcing fallback") +Signed-off-by: D. Wythe +Signed-off-by: Simon Horman + +v2 -> v1: Optimize code style +Reviewed-by: Tony Lu + +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/smc/af_smc.c | 13 ++++++++----- + 1 file changed, 8 insertions(+), 5 deletions(-) + +diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c +index 41cbc7c89c9d2..8ab84926816f6 100644 +--- a/net/smc/af_smc.c ++++ b/net/smc/af_smc.c +@@ -1988,16 +1988,14 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + { + struct sock *sk = sock->sk; + struct smc_sock *smc; +- int rc = -EPIPE; ++ int rc; + + smc = smc_sk(sk); + lock_sock(sk); +- if ((sk->sk_state != SMC_ACTIVE) && +- (sk->sk_state != SMC_APPCLOSEWAIT1) && +- (sk->sk_state != SMC_INIT)) +- goto out; + ++ /* SMC does not support connect with fastopen */ + if (msg->msg_flags & MSG_FASTOPEN) { ++ /* not connected yet, fallback */ + if (sk->sk_state == SMC_INIT && !smc->connect_nonblock) { + smc_switch_to_fallback(smc); + smc->fallback_rsn = SMC_CLC_DECL_OPTUNSUPP; +@@ -2005,6 +2003,11 @@ static int smc_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) + rc = -EINVAL; + goto out; + } ++ } else if ((sk->sk_state != SMC_ACTIVE) && ++ (sk->sk_state != SMC_APPCLOSEWAIT1) && ++ (sk->sk_state != SMC_INIT)) { ++ rc = -EPIPE; ++ goto out; + } + + if (smc->use_fallback) +-- +2.39.2 + diff --git a/queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch b/queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch new file mode 100644 index 00000000000..c77dc8d8ece --- /dev/null +++ b/queue-5.10/net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch @@ -0,0 +1,50 @@ +From ff5576cb0372afdabad429f5c9fc87d6ffb29972 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 14:21:43 +0800 +Subject: net: stmmac: add to set device wake up flag when stmmac init phy + +From: Rongguang Wei + +[ Upstream commit a9334b702a03b693f54ebd3b98f67bf722b74870 ] + +When MAC is not support PMT, driver will check PHY's WoL capability +and set device wakeup capability in stmmac_init_phy(). We can enable +the WoL through ethtool, the driver would enable the device wake up +flag. Now the device_may_wakeup() return true. + +But if there is a way which enable the PHY's WoL capability derectly, +like in BIOS. The driver would not know the enable thing and would not +set the device wake up flag. The phy_suspend may failed like this: + +[ 32.409063] PM: dpm_run_callback(): mdio_bus_phy_suspend+0x0/0x50 returns -16 +[ 32.409065] PM: Device stmmac-1:00 failed to suspend: error -16 +[ 32.409067] PM: Some devices failed to suspend, or early wake event detected + +Add to set the device wakeup enable flag according to the get_wol +function result in PHY can fix the error in this scene. + +v2: add a Fixes tag. + +Fixes: 1d8e5b0f3f2c ("net: stmmac: Support WOL with phy") +Signed-off-by: Rongguang Wei +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +index 1ec000d4c7705..04c59102a2863 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c ++++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +@@ -1145,6 +1145,7 @@ static int stmmac_init_phy(struct net_device *dev) + + phylink_ethtool_get_wol(priv->phylink, &wol); + device_set_wakeup_capable(priv->device, !!wol.supported); ++ device_set_wakeup_enable(priv->device, !!wol.wolopts); + } + + return ret; +-- +2.39.2 + diff --git a/queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch b/queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch new file mode 100644 index 00000000000..a59e2ac5ef0 --- /dev/null +++ b/queue-5.10/net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch @@ -0,0 +1,540 @@ +From 5bd38ea4f8a413bd78444f4001fd1ca2b70e9193 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 2 Nov 2020 11:45:06 +0000 +Subject: net: usb: lan78xx: Remove lots of set but unused 'ret' variables +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Lee Jones + +[ Upstream commit 06cd7c46b3ab3f2252c61bf85b191236cf0254e1 ] + +Fixes the following W=1 kernel build warning(s): + + drivers/net/usb/lan78xx.c: In function ‘lan78xx_read_raw_otp’: + drivers/net/usb/lan78xx.c:825:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_write_raw_otp’: + drivers/net/usb/lan78xx.c:879:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_deferred_multicast_write’: + drivers/net/usb/lan78xx.c:1041:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_update_flowcontrol’: + drivers/net/usb/lan78xx.c:1127:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_init_mac_address’: + drivers/net/usb/lan78xx.c:1666:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_link_status_change’: + drivers/net/usb/lan78xx.c:1841:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_irq_bus_sync_unlock’: + drivers/net/usb/lan78xx.c:1920:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan8835_fixup’: + drivers/net/usb/lan78xx.c:1994:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_rx_max_frame_length’: + drivers/net/usb/lan78xx.c:2192:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_change_mtu’: + drivers/net/usb/lan78xx.c:2270:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_mac_addr’: + drivers/net/usb/lan78xx.c:2299:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_features’: + drivers/net/usb/lan78xx.c:2333:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + drivers/net/usb/lan78xx.c: In function ‘lan78xx_set_suspend’: + drivers/net/usb/lan78xx.c:3807:6: warning: variable ‘ret’ set but not used [-Wunused-but-set-variable] + +Signed-off-by: Lee Jones +Link: https://lore.kernel.org/r/20201102114512.1062724-25-lee.jones@linaro.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: e57cf3639c32 ("net: lan78xx: fix accessing the LAN7800's internal phy specific registers from the MAC driver") +Signed-off-by: Sasha Levin +--- + drivers/net/usb/lan78xx.c | 168 ++++++++++++++++++-------------------- + 1 file changed, 78 insertions(+), 90 deletions(-) + +diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c +index 6f7b70522d926..0b5b4f9c7c5b9 100644 +--- a/drivers/net/usb/lan78xx.c ++++ b/drivers/net/usb/lan78xx.c +@@ -824,20 +824,19 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, + u32 length, u8 *data) + { + int i; +- int ret; + u32 buf; + unsigned long timeout; + +- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); ++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + + if (buf & OTP_PWR_DN_PWRDN_N_) { + /* clear it and wait to be cleared */ +- ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); ++ lan78xx_write_reg(dev, OTP_PWR_DN, 0); + + timeout = jiffies + HZ; + do { + usleep_range(1, 10); +- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); ++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (time_after(jiffies, timeout)) { + netdev_warn(dev->net, + "timeout on OTP_PWR_DN"); +@@ -847,18 +846,18 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, + } + + for (i = 0; i < length; i++) { +- ret = lan78xx_write_reg(dev, OTP_ADDR1, ++ lan78xx_write_reg(dev, OTP_ADDR1, + ((offset + i) >> 8) & OTP_ADDR1_15_11); +- ret = lan78xx_write_reg(dev, OTP_ADDR2, ++ lan78xx_write_reg(dev, OTP_ADDR2, + ((offset + i) & OTP_ADDR2_10_3)); + +- ret = lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); +- ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); ++ lan78xx_write_reg(dev, OTP_FUNC_CMD, OTP_FUNC_CMD_READ_); ++ lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + + timeout = jiffies + HZ; + do { + udelay(1); +- ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); ++ lan78xx_read_reg(dev, OTP_STATUS, &buf); + if (time_after(jiffies, timeout)) { + netdev_warn(dev->net, + "timeout on OTP_STATUS"); +@@ -866,7 +865,7 @@ static int lan78xx_read_raw_otp(struct lan78xx_net *dev, u32 offset, + } + } while (buf & OTP_STATUS_BUSY_); + +- ret = lan78xx_read_reg(dev, OTP_RD_DATA, &buf); ++ lan78xx_read_reg(dev, OTP_RD_DATA, &buf); + + data[i] = (u8)(buf & 0xFF); + } +@@ -878,20 +877,19 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, + u32 length, u8 *data) + { + int i; +- int ret; + u32 buf; + unsigned long timeout; + +- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); ++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + + if (buf & OTP_PWR_DN_PWRDN_N_) { + /* clear it and wait to be cleared */ +- ret = lan78xx_write_reg(dev, OTP_PWR_DN, 0); ++ lan78xx_write_reg(dev, OTP_PWR_DN, 0); + + timeout = jiffies + HZ; + do { + udelay(1); +- ret = lan78xx_read_reg(dev, OTP_PWR_DN, &buf); ++ lan78xx_read_reg(dev, OTP_PWR_DN, &buf); + if (time_after(jiffies, timeout)) { + netdev_warn(dev->net, + "timeout on OTP_PWR_DN completion"); +@@ -901,21 +899,21 @@ static int lan78xx_write_raw_otp(struct lan78xx_net *dev, u32 offset, + } + + /* set to BYTE program mode */ +- ret = lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); ++ lan78xx_write_reg(dev, OTP_PRGM_MODE, OTP_PRGM_MODE_BYTE_); + + for (i = 0; i < length; i++) { +- ret = lan78xx_write_reg(dev, OTP_ADDR1, ++ lan78xx_write_reg(dev, OTP_ADDR1, + ((offset + i) >> 8) & OTP_ADDR1_15_11); +- ret = lan78xx_write_reg(dev, OTP_ADDR2, ++ lan78xx_write_reg(dev, OTP_ADDR2, + ((offset + i) & OTP_ADDR2_10_3)); +- ret = lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); +- ret = lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); +- ret = lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); ++ lan78xx_write_reg(dev, OTP_PRGM_DATA, data[i]); ++ lan78xx_write_reg(dev, OTP_TST_CMD, OTP_TST_CMD_PRGVRFY_); ++ lan78xx_write_reg(dev, OTP_CMD_GO, OTP_CMD_GO_GO_); + + timeout = jiffies + HZ; + do { + udelay(1); +- ret = lan78xx_read_reg(dev, OTP_STATUS, &buf); ++ lan78xx_read_reg(dev, OTP_STATUS, &buf); + if (time_after(jiffies, timeout)) { + netdev_warn(dev->net, + "Timeout on OTP_STATUS completion"); +@@ -1040,7 +1038,6 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) + container_of(param, struct lan78xx_priv, set_multicast); + struct lan78xx_net *dev = pdata->dev; + int i; +- int ret; + + netif_dbg(dev, drv, dev->net, "deferred multicast write 0x%08x\n", + pdata->rfe_ctl); +@@ -1049,14 +1046,14 @@ static void lan78xx_deferred_multicast_write(struct work_struct *param) + DP_SEL_VHF_HASH_LEN, pdata->mchash_table); + + for (i = 1; i < NUM_OF_MAF; i++) { +- ret = lan78xx_write_reg(dev, MAF_HI(i), 0); +- ret = lan78xx_write_reg(dev, MAF_LO(i), ++ lan78xx_write_reg(dev, MAF_HI(i), 0); ++ lan78xx_write_reg(dev, MAF_LO(i), + pdata->pfilter_table[i][1]); +- ret = lan78xx_write_reg(dev, MAF_HI(i), ++ lan78xx_write_reg(dev, MAF_HI(i), + pdata->pfilter_table[i][0]); + } + +- ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); ++ lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + } + + static void lan78xx_set_multicast(struct net_device *netdev) +@@ -1126,7 +1123,6 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, + u16 lcladv, u16 rmtadv) + { + u32 flow = 0, fct_flow = 0; +- int ret; + u8 cap; + + if (dev->fc_autoneg) +@@ -1149,10 +1145,10 @@ static int lan78xx_update_flowcontrol(struct lan78xx_net *dev, u8 duplex, + (cap & FLOW_CTRL_RX ? "enabled" : "disabled"), + (cap & FLOW_CTRL_TX ? "enabled" : "disabled")); + +- ret = lan78xx_write_reg(dev, FCT_FLOW, fct_flow); ++ lan78xx_write_reg(dev, FCT_FLOW, fct_flow); + + /* threshold value should be set before enabling flow */ +- ret = lan78xx_write_reg(dev, FLOW, flow); ++ lan78xx_write_reg(dev, FLOW, flow); + + return 0; + } +@@ -1673,11 +1669,10 @@ static const struct ethtool_ops lan78xx_ethtool_ops = { + static void lan78xx_init_mac_address(struct lan78xx_net *dev) + { + u32 addr_lo, addr_hi; +- int ret; + u8 addr[6]; + +- ret = lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); +- ret = lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); ++ lan78xx_read_reg(dev, RX_ADDRL, &addr_lo); ++ lan78xx_read_reg(dev, RX_ADDRH, &addr_hi); + + addr[0] = addr_lo & 0xFF; + addr[1] = (addr_lo >> 8) & 0xFF; +@@ -1710,12 +1705,12 @@ static void lan78xx_init_mac_address(struct lan78xx_net *dev) + (addr[2] << 16) | (addr[3] << 24); + addr_hi = addr[4] | (addr[5] << 8); + +- ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); +- ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); ++ lan78xx_write_reg(dev, RX_ADDRL, addr_lo); ++ lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + } + +- ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); +- ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); ++ lan78xx_write_reg(dev, MAF_LO(0), addr_lo); ++ lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + + ether_addr_copy(dev->net->dev_addr, addr); + } +@@ -1848,7 +1843,7 @@ static void lan78xx_remove_mdio(struct lan78xx_net *dev) + static void lan78xx_link_status_change(struct net_device *net) + { + struct phy_device *phydev = net->phydev; +- int ret, temp; ++ int temp; + + /* At forced 100 F/H mode, chip may fail to set mode correctly + * when cable is switched between long(~50+m) and short one. +@@ -1859,7 +1854,7 @@ static void lan78xx_link_status_change(struct net_device *net) + /* disable phy interrupt */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp &= ~LAN88XX_INT_MASK_MDINTPIN_EN_; +- ret = phy_write(phydev, LAN88XX_INT_MASK, temp); ++ phy_write(phydev, LAN88XX_INT_MASK, temp); + + temp = phy_read(phydev, MII_BMCR); + temp &= ~(BMCR_SPEED100 | BMCR_SPEED1000); +@@ -1873,7 +1868,7 @@ static void lan78xx_link_status_change(struct net_device *net) + /* enable phy interrupt back */ + temp = phy_read(phydev, LAN88XX_INT_MASK); + temp |= LAN88XX_INT_MASK_MDINTPIN_EN_; +- ret = phy_write(phydev, LAN88XX_INT_MASK, temp); ++ phy_write(phydev, LAN88XX_INT_MASK, temp); + } + } + +@@ -1927,14 +1922,13 @@ static void lan78xx_irq_bus_sync_unlock(struct irq_data *irqd) + struct lan78xx_net *dev = + container_of(data, struct lan78xx_net, domain_data); + u32 buf; +- int ret; + + /* call register access here because irq_bus_lock & irq_bus_sync_unlock + * are only two callbacks executed in non-atomic contex. + */ +- ret = lan78xx_read_reg(dev, INT_EP_CTL, &buf); ++ lan78xx_read_reg(dev, INT_EP_CTL, &buf); + if (buf != data->irqenable) +- ret = lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); ++ lan78xx_write_reg(dev, INT_EP_CTL, data->irqenable); + + mutex_unlock(&data->irq_lock); + } +@@ -2001,7 +1995,6 @@ static void lan78xx_remove_irq_domain(struct lan78xx_net *dev) + static int lan8835_fixup(struct phy_device *phydev) + { + int buf; +- int ret; + struct lan78xx_net *dev = netdev_priv(phydev->attached_dev); + + /* LED2/PME_N/IRQ_N/RGMII_ID pin to IRQ_N mode */ +@@ -2011,11 +2004,11 @@ static int lan8835_fixup(struct phy_device *phydev) + phy_write_mmd(phydev, MDIO_MMD_PCS, 0x8010, buf); + + /* RGMII MAC TXC Delay Enable */ +- ret = lan78xx_write_reg(dev, MAC_RGMII_ID, ++ lan78xx_write_reg(dev, MAC_RGMII_ID, + MAC_RGMII_ID_TXC_DELAY_EN_); + + /* RGMII TX DLL Tune Adjust */ +- ret = lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); ++ lan78xx_write_reg(dev, RGMII_TX_BYP_DLL, 0x3D00); + + dev->interface = PHY_INTERFACE_MODE_RGMII_TXID; + +@@ -2199,28 +2192,27 @@ static int lan78xx_phy_init(struct lan78xx_net *dev) + + static int lan78xx_set_rx_max_frame_length(struct lan78xx_net *dev, int size) + { +- int ret = 0; + u32 buf; + bool rxenabled; + +- ret = lan78xx_read_reg(dev, MAC_RX, &buf); ++ lan78xx_read_reg(dev, MAC_RX, &buf); + + rxenabled = ((buf & MAC_RX_RXEN_) != 0); + + if (rxenabled) { + buf &= ~MAC_RX_RXEN_; +- ret = lan78xx_write_reg(dev, MAC_RX, buf); ++ lan78xx_write_reg(dev, MAC_RX, buf); + } + + /* add 4 to size for FCS */ + buf &= ~MAC_RX_MAX_SIZE_MASK_; + buf |= (((size + 4) << MAC_RX_MAX_SIZE_SHIFT_) & MAC_RX_MAX_SIZE_MASK_); + +- ret = lan78xx_write_reg(dev, MAC_RX, buf); ++ lan78xx_write_reg(dev, MAC_RX, buf); + + if (rxenabled) { + buf |= MAC_RX_RXEN_; +- ret = lan78xx_write_reg(dev, MAC_RX, buf); ++ lan78xx_write_reg(dev, MAC_RX, buf); + } + + return 0; +@@ -2277,13 +2269,12 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) + int ll_mtu = new_mtu + netdev->hard_header_len; + int old_hard_mtu = dev->hard_mtu; + int old_rx_urb_size = dev->rx_urb_size; +- int ret; + + /* no second zero-length packet read wanted after mtu-sized packets */ + if ((ll_mtu % dev->maxpacket) == 0) + return -EDOM; + +- ret = lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); ++ lan78xx_set_rx_max_frame_length(dev, new_mtu + VLAN_ETH_HLEN); + + netdev->mtu = new_mtu; + +@@ -2306,7 +2297,6 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) + struct lan78xx_net *dev = netdev_priv(netdev); + struct sockaddr *addr = p; + u32 addr_lo, addr_hi; +- int ret; + + if (netif_running(netdev)) + return -EBUSY; +@@ -2323,12 +2313,12 @@ static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) + addr_hi = netdev->dev_addr[4] | + netdev->dev_addr[5] << 8; + +- ret = lan78xx_write_reg(dev, RX_ADDRL, addr_lo); +- ret = lan78xx_write_reg(dev, RX_ADDRH, addr_hi); ++ lan78xx_write_reg(dev, RX_ADDRL, addr_lo); ++ lan78xx_write_reg(dev, RX_ADDRH, addr_hi); + + /* Added to support MAC address changes */ +- ret = lan78xx_write_reg(dev, MAF_LO(0), addr_lo); +- ret = lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); ++ lan78xx_write_reg(dev, MAF_LO(0), addr_lo); ++ lan78xx_write_reg(dev, MAF_HI(0), addr_hi | MAF_HI_VALID_); + + return 0; + } +@@ -2340,7 +2330,6 @@ static int lan78xx_set_features(struct net_device *netdev, + struct lan78xx_net *dev = netdev_priv(netdev); + struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); + unsigned long flags; +- int ret; + + spin_lock_irqsave(&pdata->rfe_ctl_lock, flags); + +@@ -2364,7 +2353,7 @@ static int lan78xx_set_features(struct net_device *netdev, + + spin_unlock_irqrestore(&pdata->rfe_ctl_lock, flags); + +- ret = lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); ++ lan78xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); + + return 0; + } +@@ -3820,7 +3809,6 @@ static u16 lan78xx_wakeframe_crc16(const u8 *buf, int len) + static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + { + u32 buf; +- int ret; + int mask_index; + u16 crc; + u32 temp_wucsr; +@@ -3829,26 +3817,26 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + const u8 ipv6_multicast[3] = { 0x33, 0x33 }; + const u8 arp_type[2] = { 0x08, 0x06 }; + +- ret = lan78xx_read_reg(dev, MAC_TX, &buf); ++ lan78xx_read_reg(dev, MAC_TX, &buf); + buf &= ~MAC_TX_TXEN_; +- ret = lan78xx_write_reg(dev, MAC_TX, buf); +- ret = lan78xx_read_reg(dev, MAC_RX, &buf); ++ lan78xx_write_reg(dev, MAC_TX, buf); ++ lan78xx_read_reg(dev, MAC_RX, &buf); + buf &= ~MAC_RX_RXEN_; +- ret = lan78xx_write_reg(dev, MAC_RX, buf); ++ lan78xx_write_reg(dev, MAC_RX, buf); + +- ret = lan78xx_write_reg(dev, WUCSR, 0); +- ret = lan78xx_write_reg(dev, WUCSR2, 0); +- ret = lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); ++ lan78xx_write_reg(dev, WUCSR, 0); ++ lan78xx_write_reg(dev, WUCSR2, 0); ++ lan78xx_write_reg(dev, WK_SRC, 0xFFF1FF1FUL); + + temp_wucsr = 0; + + temp_pmt_ctl = 0; +- ret = lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); ++ lan78xx_read_reg(dev, PMT_CTL, &temp_pmt_ctl); + temp_pmt_ctl &= ~PMT_CTL_RES_CLR_WKP_EN_; + temp_pmt_ctl |= PMT_CTL_RES_CLR_WKP_STS_; + + for (mask_index = 0; mask_index < NUM_OF_WUF_CFG; mask_index++) +- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_CFG(mask_index), 0); + + mask_index = 0; + if (wol & WAKE_PHY) { +@@ -3877,30 +3865,30 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + + /* set WUF_CFG & WUF_MASK for IPv4 Multicast */ + crc = lan78xx_wakeframe_crc16(ipv4_multicast, 3); +- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), ++ lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + +- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); +- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 7); ++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + mask_index++; + + /* for IPv6 Multicast */ + crc = lan78xx_wakeframe_crc16(ipv6_multicast, 2); +- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), ++ lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_MCAST_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + +- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); +- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 3); ++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + mask_index++; + + temp_pmt_ctl |= PMT_CTL_WOL_EN_; +@@ -3921,16 +3909,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + * for packettype (offset 12,13) = ARP (0x0806) + */ + crc = lan78xx_wakeframe_crc16(arp_type, 2); +- ret = lan78xx_write_reg(dev, WUF_CFG(mask_index), ++ lan78xx_write_reg(dev, WUF_CFG(mask_index), + WUF_CFGX_EN_ | + WUF_CFGX_TYPE_ALL_ | + (0 << WUF_CFGX_OFFSET_SHIFT_) | + (crc & WUF_CFGX_CRC16_MASK_)); + +- ret = lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); +- ret = lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); +- ret = lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK0(mask_index), 0x3000); ++ lan78xx_write_reg(dev, WUF_MASK1(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK2(mask_index), 0); ++ lan78xx_write_reg(dev, WUF_MASK3(mask_index), 0); + mask_index++; + + temp_pmt_ctl |= PMT_CTL_WOL_EN_; +@@ -3938,7 +3926,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; + } + +- ret = lan78xx_write_reg(dev, WUCSR, temp_wucsr); ++ lan78xx_write_reg(dev, WUCSR, temp_wucsr); + + /* when multiple WOL bits are set */ + if (hweight_long((unsigned long)wol) > 1) { +@@ -3946,16 +3934,16 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) + temp_pmt_ctl &= ~PMT_CTL_SUS_MODE_MASK_; + temp_pmt_ctl |= PMT_CTL_SUS_MODE_0_; + } +- ret = lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); ++ lan78xx_write_reg(dev, PMT_CTL, temp_pmt_ctl); + + /* clear WUPS */ +- ret = lan78xx_read_reg(dev, PMT_CTL, &buf); ++ lan78xx_read_reg(dev, PMT_CTL, &buf); + buf |= PMT_CTL_WUPS_MASK_; +- ret = lan78xx_write_reg(dev, PMT_CTL, buf); ++ lan78xx_write_reg(dev, PMT_CTL, buf); + +- ret = lan78xx_read_reg(dev, MAC_RX, &buf); ++ lan78xx_read_reg(dev, MAC_RX, &buf); + buf |= MAC_RX_RXEN_; +- ret = lan78xx_write_reg(dev, MAC_RX, buf); ++ lan78xx_write_reg(dev, MAC_RX, buf); + + return 0; + } +-- +2.39.2 + diff --git a/queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch b/queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch new file mode 100644 index 00000000000..36912afe3a9 --- /dev/null +++ b/queue-5.10/netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch @@ -0,0 +1,80 @@ +From 6cb1137d72bb5310fa2ef6f663fc7b7adf26ee20 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 17:48:31 -0800 +Subject: netfilter: ctnetlink: revert to dumping mark regardless of event type + +From: Ivan Delalande + +[ Upstream commit 9f7dd42f0db1dc6915a52d4a8a96ca18dd8cc34e ] + +It seems that change was unintentional, we have userspace code that +needs the mark while listening for events like REPLY, DESTROY, etc. +Also include 0-marks in requested dumps, as they were before that fix. + +Fixes: 1feeae071507 ("netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark") +Signed-off-by: Ivan Delalande +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nf_conntrack_netlink.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c +index f8ba3bc25cf34..c9ca857f1068d 100644 +--- a/net/netfilter/nf_conntrack_netlink.c ++++ b/net/netfilter/nf_conntrack_netlink.c +@@ -317,11 +317,12 @@ ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +-static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) ++static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct, ++ bool dump) + { + u32 mark = READ_ONCE(ct->mark); + +- if (!mark) ++ if (!mark && !dump) + return 0; + + if (nla_put_be32(skb, CTA_MARK, htonl(mark))) +@@ -332,7 +333,7 @@ static int ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct) + return -1; + } + #else +-#define ctnetlink_dump_mark(a, b) (0) ++#define ctnetlink_dump_mark(a, b, c) (0) + #endif + + #ifdef CONFIG_NF_CONNTRACK_SECMARK +@@ -537,7 +538,7 @@ static int ctnetlink_dump_extinfo(struct sk_buff *skb, + static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct) + { + if (ctnetlink_dump_status(skb, ct) < 0 || +- ctnetlink_dump_mark(skb, ct) < 0 || ++ ctnetlink_dump_mark(skb, ct, true) < 0 || + ctnetlink_dump_secctx(skb, ct) < 0 || + ctnetlink_dump_id(skb, ct) < 0 || + ctnetlink_dump_use(skb, ct) < 0 || +@@ -816,8 +817,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) + } + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if (events & (1 << IPCT_MARK) && +- ctnetlink_dump_mark(skb, ct) < 0) ++ if (ctnetlink_dump_mark(skb, ct, events & (1 << IPCT_MARK))) + goto nla_put_failure; + #endif + nlmsg_end(skb, nlh); +@@ -2734,7 +2734,7 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) + goto nla_put_failure; + + #ifdef CONFIG_NF_CONNTRACK_MARK +- if (ctnetlink_dump_mark(skb, ct) < 0) ++ if (ctnetlink_dump_mark(skb, ct, true) < 0) + goto nla_put_failure; + #endif + if (ctnetlink_dump_labels(skb, ct) < 0) +-- +2.39.2 + diff --git a/queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch b/queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch new file mode 100644 index 00000000000..da6c0f28c1c --- /dev/null +++ b/queue-5.10/netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch @@ -0,0 +1,83 @@ +From 6381d75ba1ecd932217d7f7d64942ff0f5b7445f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 10:58:56 +0100 +Subject: netfilter: tproxy: fix deadlock due to missing BH disable +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Florian Westphal + +[ Upstream commit 4a02426787bf024dafdb79b362285ee325de3f5e ] + +The xtables packet traverser performs an unconditional local_bh_disable(), +but the nf_tables evaluation loop does not. + +Functions that are called from either xtables or nftables must assume +that they can be called in process context. + +inet_twsk_deschedule_put() assumes that no softirq interrupt can occur. +If tproxy is used from nf_tables its possible that we'll deadlock +trying to aquire a lock already held in process context. + +Add a small helper that takes care of this and use it. + +Link: https://lore.kernel.org/netfilter-devel/401bd6ed-314a-a196-1cdc-e13c720cc8f2@balasys.hu/ +Fixes: 4ed8eb6570a4 ("netfilter: nf_tables: Add native tproxy support") +Reported-and-tested-by: Major Dávid +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/net/netfilter/nf_tproxy.h | 7 +++++++ + net/ipv4/netfilter/nf_tproxy_ipv4.c | 2 +- + net/ipv6/netfilter/nf_tproxy_ipv6.c | 2 +- + 3 files changed, 9 insertions(+), 2 deletions(-) + +diff --git a/include/net/netfilter/nf_tproxy.h b/include/net/netfilter/nf_tproxy.h +index 82d0e41b76f22..faa108b1ba675 100644 +--- a/include/net/netfilter/nf_tproxy.h ++++ b/include/net/netfilter/nf_tproxy.h +@@ -17,6 +17,13 @@ static inline bool nf_tproxy_sk_is_transparent(struct sock *sk) + return false; + } + ++static inline void nf_tproxy_twsk_deschedule_put(struct inet_timewait_sock *tw) ++{ ++ local_bh_disable(); ++ inet_twsk_deschedule_put(tw); ++ local_bh_enable(); ++} ++ + /* assign a socket to the skb -- consumes sk */ + static inline void nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) + { +diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c +index b2bae0b0e42a1..61cb2341f50fe 100644 +--- a/net/ipv4/netfilter/nf_tproxy_ipv4.c ++++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c +@@ -38,7 +38,7 @@ nf_tproxy_handle_time_wait4(struct net *net, struct sk_buff *skb, + hp->source, lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { +- inet_twsk_deschedule_put(inet_twsk(sk)); ++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } +diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c +index 6bac68fb27a39..3fe4f15e01dc8 100644 +--- a/net/ipv6/netfilter/nf_tproxy_ipv6.c ++++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c +@@ -63,7 +63,7 @@ nf_tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff, + lport ? lport : hp->dest, + skb->dev, NF_TPROXY_LOOKUP_LISTENER); + if (sk2) { +- inet_twsk_deschedule_put(inet_twsk(sk)); ++ nf_tproxy_twsk_deschedule_put(inet_twsk(sk)); + sk = sk2; + } + } +-- +2.39.2 + diff --git a/queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch b/queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch new file mode 100644 index 00000000000..469c096f677 --- /dev/null +++ b/queue-5.10/nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch @@ -0,0 +1,49 @@ +From ad95ef4d6e4c52faf8606c661e9cd450d5edca1d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 17:30:37 +0800 +Subject: nfc: fdp: add null check of devm_kmalloc_array in + fdp_nci_i2c_read_device_properties + +From: Kang Chen + +[ Upstream commit 11f180a5d62a51b484e9648f9b310e1bd50b1a57 ] + +devm_kmalloc_array may fails, *fw_vsc_cfg might be null and cause +out-of-bounds write in device_property_read_u8_array later. + +Fixes: a06347c04c13 ("NFC: Add Intel Fields Peak NFC solution driver") +Signed-off-by: Kang Chen +Reviewed-by: Krzysztof Kozlowski +Reviewed-by: Simon Horman +Link: https://lore.kernel.org/r/20230227093037.907654-1-void0red@gmail.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + drivers/nfc/fdp/i2c.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/nfc/fdp/i2c.c b/drivers/nfc/fdp/i2c.c +index 5e300788be525..808d73050afd0 100644 +--- a/drivers/nfc/fdp/i2c.c ++++ b/drivers/nfc/fdp/i2c.c +@@ -249,6 +249,9 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, + len, sizeof(**fw_vsc_cfg), + GFP_KERNEL); + ++ if (!*fw_vsc_cfg) ++ goto alloc_err; ++ + r = device_property_read_u8_array(dev, FDP_DP_FW_VSC_CFG_NAME, + *fw_vsc_cfg, len); + +@@ -262,6 +265,7 @@ static void fdp_nci_i2c_read_device_properties(struct device *dev, + *fw_vsc_cfg = NULL; + } + ++alloc_err: + dev_dbg(dev, "Clock type: %d, clock frequency: %d, VSC: %s", + *clock_type, *clock_freq, *fw_vsc_cfg != NULL ? "yes" : "no"); + } +-- +2.39.2 + diff --git a/queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch b/queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch new file mode 100644 index 00000000000..bdb420303ae --- /dev/null +++ b/queue-5.10/platform-x86-mlx_platform-select-regmap-instead-of-d.patch @@ -0,0 +1,50 @@ +From 695d62e869960ab3f07236095ed9f3f2bb757344 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 25 Feb 2023 21:39:51 -0800 +Subject: platform: x86: MLX_PLATFORM: select REGMAP instead of depending on it + +From: Randy Dunlap + +[ Upstream commit 7e7e1541c91615e9950d0b96bcd1806d297e970e ] + +REGMAP is a hidden (not user visible) symbol. Users cannot set it +directly thru "make *config", so drivers should select it instead of +depending on it if they need it. + +Consistently using "select" or "depends on" can also help reduce +Kconfig circular dependency issues. + +Therefore, change the use of "depends on REGMAP" to "select REGMAP". + +Fixes: ef0f62264b2a ("platform/x86: mlx-platform: Add physical bus number auto detection") +Signed-off-by: Randy Dunlap +Cc: Vadim Pasternak +Cc: Darren Hart +Cc: Hans de Goede +Cc: Mark Gross +Cc: platform-driver-x86@vger.kernel.org +Link: https://lore.kernel.org/r/20230226053953.4681-7-rdunlap@infradead.org +Signed-off-by: Hans de Goede +Reviewed-by: Hans de Goede +Signed-off-by: Sasha Levin +--- + drivers/platform/x86/Kconfig | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig +index a1858689d6e10..84c5b922f245e 100644 +--- a/drivers/platform/x86/Kconfig ++++ b/drivers/platform/x86/Kconfig +@@ -1195,7 +1195,8 @@ config I2C_MULTI_INSTANTIATE + + config MLX_PLATFORM + tristate "Mellanox Technologies platform support" +- depends on I2C && REGMAP ++ depends on I2C ++ select REGMAP + help + This option enables system support for the Mellanox Technologies + platform. The Mellanox systems provide data center networking +-- +2.39.2 + diff --git a/queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch b/queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch new file mode 100644 index 00000000000..66d551e1e6e --- /dev/null +++ b/queue-5.10/powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch @@ -0,0 +1,36 @@ +From 66894d71b0beadb5b792a7b62763eab3a7798844 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 24 Feb 2023 17:59:39 +0200 +Subject: powerpc: dts: t1040rdb: fix compatible string for Rev A boards + +From: Vladimir Oltean + +[ Upstream commit ae44f1c9d1fc54aeceb335fedb1e73b2c3ee4561 ] + +It looks like U-Boot fails to start the kernel properly when the +compatible string of the board isn't fsl,T1040RDB, so stop overriding it +from the rev-a.dts. + +Fixes: 5ebb74749202 ("powerpc: dts: t1040rdb: fix ports names for Seville Ethernet switch") +Signed-off-by: Vladimir Oltean +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts +index 73f8c998c64df..d4f5f159d6f23 100644 +--- a/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts ++++ b/arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dts +@@ -10,7 +10,6 @@ + + / { + model = "fsl,T1040RDB-REV-A"; +- compatible = "fsl,T1040RDB-REV-A"; + }; + + &seville_port0 { +-- +2.39.2 + diff --git a/queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch b/queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch new file mode 100644 index 00000000000..ad019a5e10b --- /dev/null +++ b/queue-5.10/risc-v-avoid-dereferening-null-regs-in-die.patch @@ -0,0 +1,57 @@ +From f0d5977c98155a0bed9f1a9093ca9a7e9f2d0f0e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 20 Sep 2022 13:00:37 -0700 +Subject: RISC-V: Avoid dereferening NULL regs in die() + +From: Palmer Dabbelt + +[ Upstream commit f2913d006fcdb61719635e093d1b5dd0dafecac7 ] + +I don't think we can actually die() without a regs pointer, but the +compiler was warning about a NULL check after a dereference. It seems +prudent to just avoid the possibly-NULL dereference, given that when +die()ing the system is already toast so who knows how we got there. + +Reported-by: kernel test robot +Reported-by: Dan Carpenter +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20220920200037.6727-1-palmer@rivosinc.com +Signed-off-by: Palmer Dabbelt +Stable-dep-of: 130aee3fd998 ("riscv: Avoid enabling interrupts in die()") +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index 23fe03ca7ec7b..bc6b30f3add83 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -31,6 +31,7 @@ void die(struct pt_regs *regs, const char *str) + { + static int die_counter; + int ret; ++ long cause; + + oops_enter(); + +@@ -40,11 +41,13 @@ void die(struct pt_regs *regs, const char *str) + + pr_emerg("%s [#%d]\n", str, ++die_counter); + print_modules(); +- show_regs(regs); ++ if (regs) ++ show_regs(regs); + +- ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV); ++ cause = regs ? regs->cause : -1; ++ ret = notify_die(DIE_OOPS, str, regs, 0, cause, SIGSEGV); + +- if (regs && kexec_should_crash(current)) ++ if (kexec_should_crash(current)) + crash_kexec(regs); + + bust_spinlocks(0); +-- +2.39.2 + diff --git a/queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch b/queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch new file mode 100644 index 00000000000..2ce5412171f --- /dev/null +++ b/queue-5.10/risc-v-don-t-check-text_mutex-during-stop_machine.patch @@ -0,0 +1,163 @@ +From 08ee349fb46fb31600fbf34244e65b4378ea2d65 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 14:37:55 +0000 +Subject: RISC-V: Don't check text_mutex during stop_machine + +From: Conor Dooley + +[ Upstream commit 2a8db5ec4a28a0fce822d10224db9471a44b6925 ] + +We're currently using stop_machine() to update ftrace & kprobes, which +means that the thread that takes text_mutex during may not be the same +as the thread that eventually patches the code. This isn't actually a +race because the lock is still held (preventing any other concurrent +accesses) and there is only one thread running during stop_machine(), +but it does trigger a lockdep failure. + +This patch just elides the lockdep check during stop_machine. + +Fixes: c15ac4fd60d5 ("riscv/ftrace: Add dynamic function tracer support") +Suggested-by: Steven Rostedt +Reported-by: Changbin Du +Signed-off-by: Palmer Dabbelt +Signed-off-by: Conor Dooley +Link: https://lore.kernel.org/r/20230303143754.4005217-1-conor.dooley@microchip.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/ftrace.h | 2 +- + arch/riscv/include/asm/patch.h | 2 ++ + arch/riscv/kernel/ftrace.c | 14 ++++++++++++-- + arch/riscv/kernel/patch.c | 28 +++++++++++++++++++++++++--- + 4 files changed, 40 insertions(+), 6 deletions(-) + +diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h +index 9e73922e1e2e5..d47d87c2d7e3d 100644 +--- a/arch/riscv/include/asm/ftrace.h ++++ b/arch/riscv/include/asm/ftrace.h +@@ -109,6 +109,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); + #define ftrace_init_nop ftrace_init_nop + #endif + +-#endif ++#endif /* CONFIG_DYNAMIC_FTRACE */ + + #endif /* _ASM_RISCV_FTRACE_H */ +diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h +index 9a7d7346001ee..98d9de07cba17 100644 +--- a/arch/riscv/include/asm/patch.h ++++ b/arch/riscv/include/asm/patch.h +@@ -9,4 +9,6 @@ + int patch_text_nosync(void *addr, const void *insns, size_t len); + int patch_text(void *addr, u32 insn); + ++extern int riscv_patch_in_stop_machine; ++ + #endif /* _ASM_RISCV_PATCH_H */ +diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c +index 47b43d8ee9a6c..1bf92cfa6764e 100644 +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -15,11 +15,21 @@ + int ftrace_arch_code_modify_prepare(void) __acquires(&text_mutex) + { + mutex_lock(&text_mutex); ++ ++ /* ++ * The code sequences we use for ftrace can't be patched while the ++ * kernel is running, so we need to use stop_machine() to modify them ++ * for now. This doesn't play nice with text_mutex, we use this flag ++ * to elide the check. ++ */ ++ riscv_patch_in_stop_machine = true; ++ + return 0; + } + + int ftrace_arch_code_modify_post_process(void) __releases(&text_mutex) + { ++ riscv_patch_in_stop_machine = false; + mutex_unlock(&text_mutex); + return 0; + } +@@ -109,9 +119,9 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) + { + int out; + +- ftrace_arch_code_modify_prepare(); ++ mutex_lock(&text_mutex); + out = ftrace_make_nop(mod, rec, MCOUNT_ADDR); +- ftrace_arch_code_modify_post_process(); ++ mutex_unlock(&text_mutex); + + return out; + } +diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c +index 1612e11f7bf6d..c3fced410e742 100644 +--- a/arch/riscv/kernel/patch.c ++++ b/arch/riscv/kernel/patch.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + + struct patch_insn { +@@ -19,6 +20,8 @@ struct patch_insn { + atomic_t cpu_count; + }; + ++int riscv_patch_in_stop_machine = false; ++ + #ifdef CONFIG_MMU + static void *patch_map(void *addr, int fixmap) + { +@@ -55,8 +58,15 @@ static int patch_insn_write(void *addr, const void *insn, size_t len) + * Before reaching here, it was expected to lock the text_mutex + * already, so we don't need to give another lock here and could + * ensure that it was safe between each cores. ++ * ++ * We're currently using stop_machine() for ftrace & kprobes, and while ++ * that ensures text_mutex is held before installing the mappings it ++ * does not ensure text_mutex is held by the calling thread. That's ++ * safe but triggers a lockdep failure, so just elide it for that ++ * specific case. + */ +- lockdep_assert_held(&text_mutex); ++ if (!riscv_patch_in_stop_machine) ++ lockdep_assert_held(&text_mutex); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); +@@ -117,13 +127,25 @@ NOKPROBE_SYMBOL(patch_text_cb); + + int patch_text(void *addr, u32 insn) + { ++ int ret; + struct patch_insn patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + +- return stop_machine_cpuslocked(patch_text_cb, +- &patch, cpu_online_mask); ++ /* ++ * kprobes takes text_mutex, before calling patch_text(), but as we call ++ * calls stop_machine(), the lockdep assertion in patch_insn_write() ++ * gets confused by the context in which the lock is taken. ++ * Instead, ensure the lock is held before calling stop_machine(), and ++ * set riscv_patch_in_stop_machine to skip the check in ++ * patch_insn_write(). ++ */ ++ lockdep_assert_held(&text_mutex); ++ riscv_patch_in_stop_machine = true; ++ ret = stop_machine_cpuslocked(patch_text_cb, &patch, cpu_online_mask); ++ riscv_patch_in_stop_machine = false; ++ return ret; + } + NOKPROBE_SYMBOL(patch_text); +-- +2.39.2 + diff --git a/queue-5.10/riscv-add-header-include-guards-to-insn.h.patch b/queue-5.10/riscv-add-header-include-guards-to-insn.h.patch new file mode 100644 index 00000000000..1c32e5dfe90 --- /dev/null +++ b/queue-5.10/riscv-add-header-include-guards-to-insn.h.patch @@ -0,0 +1,48 @@ +From 02ddbaf7ea1c9a770160c69a31629798b39c46c9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 29 Jan 2023 17:42:42 +0800 +Subject: riscv: Add header include guards to insn.h + +From: Liao Chang + +[ Upstream commit 8ac6e619d9d51b3eb5bae817db8aa94e780a0db4 ] + +Add header include guards to insn.h to prevent repeating declaration of +any identifiers in insn.h. + +Fixes: edde5584c7ab ("riscv: Add SW single-step support for KDB") +Signed-off-by: Liao Chang +Reviewed-by: Andrew Jones +Fixes: c9c1af3f186a ("RISC-V: rename parse_asm.h to insn.h") +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20230129094242.282620-1-liaochang1@huawei.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/parse_asm.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/riscv/include/asm/parse_asm.h b/arch/riscv/include/asm/parse_asm.h +index 7fee806805c1b..ad254da85e615 100644 +--- a/arch/riscv/include/asm/parse_asm.h ++++ b/arch/riscv/include/asm/parse_asm.h +@@ -3,6 +3,9 @@ + * Copyright (C) 2020 SiFive + */ + ++#ifndef _ASM_RISCV_INSN_H ++#define _ASM_RISCV_INSN_H ++ + #include + + /* The bit field of immediate value in I-type instruction */ +@@ -217,3 +220,5 @@ static inline bool is_ ## INSN_NAME ## _insn(long insn) \ + (RVC_X(x_, RVC_B_IMM_5_OPOFF, RVC_B_IMM_5_MASK) << RVC_B_IMM_5_OFF) | \ + (RVC_X(x_, RVC_B_IMM_7_6_OPOFF, RVC_B_IMM_7_6_MASK) << RVC_B_IMM_7_6_OFF) | \ + (RVC_IMM_SIGN(x_) << RVC_B_IMM_SIGN_OFF); }) ++ ++#endif /* _ASM_RISCV_INSN_H */ +-- +2.39.2 + diff --git a/queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch b/queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch new file mode 100644 index 00000000000..5828d9b5eff --- /dev/null +++ b/queue-5.10/riscv-avoid-enabling-interrupts-in-die.patch @@ -0,0 +1,60 @@ +From a3ec8c84210a077e35f797833016d5e0e8b27e62 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 14:48:28 +0000 +Subject: riscv: Avoid enabling interrupts in die() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mattias Nissler + +[ Upstream commit 130aee3fd9981297ff9354e5d5609cd59aafbbea ] + +While working on something else, I noticed that the kernel would start +accepting interrupts again after crashing in an interrupt handler. Since +the kernel is already in inconsistent state, enabling interrupts is +dangerous and opens up risk of kernel state deteriorating further. +Interrupts do get enabled via what looks like an unintended side effect of +spin_unlock_irq, so switch to the more cautious +spin_lock_irqsave/spin_unlock_irqrestore instead. + +Fixes: 76d2a0493a17 ("RISC-V: Init and Halt Code") +Signed-off-by: Mattias Nissler +Reviewed-by: Björn Töpel +Link: https://lore.kernel.org/r/20230215144828.3370316-1-mnissler@rivosinc.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/traps.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c +index bc6b30f3add83..227253fde33c4 100644 +--- a/arch/riscv/kernel/traps.c ++++ b/arch/riscv/kernel/traps.c +@@ -32,10 +32,11 @@ void die(struct pt_regs *regs, const char *str) + static int die_counter; + int ret; + long cause; ++ unsigned long flags; + + oops_enter(); + +- spin_lock_irq(&die_lock); ++ spin_lock_irqsave(&die_lock, flags); + console_verbose(); + bust_spinlocks(1); + +@@ -52,7 +53,7 @@ void die(struct pt_regs *regs, const char *str) + + bust_spinlocks(0); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); +- spin_unlock_irq(&die_lock); ++ spin_unlock_irqrestore(&die_lock, flags); + oops_exit(); + + if (in_interrupt()) +-- +2.39.2 + diff --git a/queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch b/queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch new file mode 100644 index 00000000000..a334c946b16 --- /dev/null +++ b/queue-5.10/riscv-ftrace-reduce-the-detour-code-size-to-half.patch @@ -0,0 +1,447 @@ +From 584f80a2b4caa34c1809e9356be1b8300aa2923e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jan 2023 04:05:59 -0500 +Subject: riscv: ftrace: Reduce the detour code size to half + +From: Guo Ren + +[ Upstream commit 6724a76cff85ee271bbbff42ac527e4643b2ec52 ] + +Use a temporary register to reduce the size of detour code from 16 bytes to +8 bytes. The previous implementation is from 'commit afc76b8b8011 ("riscv: +Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT")'. + +Before the patch: +: + 0: REG_S ra, -SZREG(sp) + 4: auipc ra, ? + 8: jalr ?(ra) +12: REG_L ra, -SZREG(sp) + (func_boddy) + +After the patch: +: + 0: auipc t0, ? + 4: jalr t0, ?(t0) + (func_boddy) + +This patch not just reduces the size of detour code, but also fixes an +important issue: + +An Ftrace callback registered with FTRACE_OPS_FL_IPMODIFY flag can +actually change the instruction pointer, e.g. to "replace" the given +kernel function with a new one, which is needed for livepatching, etc. + +In this case, the trampoline (ftrace_regs_caller) would not return to + but would rather jump to the new function. So, "REG_L +ra, -SZREG(sp)" would not run and the original return address would not +be restored. The kernel is likely to hang or crash as a result. + +This can be easily demonstrated if one tries to "replace", say, +cmdline_proc_show() with a new function with the same signature using +instruction_pointer_set(&fregs->regs, new_func_addr) in the Ftrace +callback. + +Link: https://lore.kernel.org/linux-riscv/20221122075440.1165172-1-suagrfillet@gmail.com/ +Link: https://lore.kernel.org/linux-riscv/d7d5730b-ebef-68e5-5046-e763e1ee6164@yadro.com/ +Co-developed-by: Song Shuai +Signed-off-by: Song Shuai +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Cc: Evgenii Shatokhin +Reviewed-by: Evgenii Shatokhin +Link: https://lore.kernel.org/r/20230112090603.1295340-4-guoren@kernel.org +Cc: stable@vger.kernel.org +Fixes: 10626c32e382 ("riscv/ftrace: Add basic support") +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/Makefile | 4 +- + arch/riscv/include/asm/ftrace.h | 50 +++++++++++++++++++------ + arch/riscv/kernel/ftrace.c | 65 ++++++++++----------------------- + arch/riscv/kernel/mcount-dyn.S | 42 ++++++++------------- + 4 files changed, 75 insertions(+), 86 deletions(-) + +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile +index 3e3467dbbf73f..8ceb667e4f38c 100644 +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -14,9 +14,9 @@ ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + ifeq ($(CONFIG_RISCV_ISA_C),y) +- CC_FLAGS_FTRACE := -fpatchable-function-entry=8 +-else + CC_FLAGS_FTRACE := -fpatchable-function-entry=4 ++else ++ CC_FLAGS_FTRACE := -fpatchable-function-entry=2 + endif + endif + +diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h +index 04dad33800418..9e73922e1e2e5 100644 +--- a/arch/riscv/include/asm/ftrace.h ++++ b/arch/riscv/include/asm/ftrace.h +@@ -42,6 +42,14 @@ struct dyn_arch_ftrace { + * 2) jalr: setting low-12 offset to ra, jump to ra, and set ra to + * return address (original pc + 4) + * ++ *: ++ * 0: auipc t0/ra, 0x? ++ * 4: jalr t0/ra, ?(t0/ra) ++ * ++ *: ++ * 0: nop ++ * 4: nop ++ * + * Dynamic ftrace generates probes to call sites, so we must deal with + * both auipc and jalr at the same time. + */ +@@ -52,25 +60,43 @@ struct dyn_arch_ftrace { + #define AUIPC_OFFSET_MASK (0xfffff000) + #define AUIPC_PAD (0x00001000) + #define JALR_SHIFT 20 +-#define JALR_BASIC (0x000080e7) +-#define AUIPC_BASIC (0x00000097) ++#define JALR_RA (0x000080e7) ++#define AUIPC_RA (0x00000097) ++#define JALR_T0 (0x000282e7) ++#define AUIPC_T0 (0x00000297) + #define NOP4 (0x00000013) + +-#define make_call(caller, callee, call) \ ++#define to_jalr_t0(offset) \ ++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_T0) ++ ++#define to_auipc_t0(offset) \ ++ ((offset & JALR_SIGN_MASK) ? \ ++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_T0) : \ ++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_T0)) ++ ++#define make_call_t0(caller, callee, call) \ + do { \ +- call[0] = to_auipc_insn((unsigned int)((unsigned long)callee - \ +- (unsigned long)caller)); \ +- call[1] = to_jalr_insn((unsigned int)((unsigned long)callee - \ +- (unsigned long)caller)); \ ++ unsigned int offset = \ ++ (unsigned long) callee - (unsigned long) caller; \ ++ call[0] = to_auipc_t0(offset); \ ++ call[1] = to_jalr_t0(offset); \ + } while (0) + +-#define to_jalr_insn(offset) \ +- (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_BASIC) ++#define to_jalr_ra(offset) \ ++ (((offset & JALR_OFFSET_MASK) << JALR_SHIFT) | JALR_RA) + +-#define to_auipc_insn(offset) \ ++#define to_auipc_ra(offset) \ + ((offset & JALR_SIGN_MASK) ? \ +- (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_BASIC) : \ +- ((offset & AUIPC_OFFSET_MASK) | AUIPC_BASIC)) ++ (((offset & AUIPC_OFFSET_MASK) + AUIPC_PAD) | AUIPC_RA) : \ ++ ((offset & AUIPC_OFFSET_MASK) | AUIPC_RA)) ++ ++#define make_call_ra(caller, callee, call) \ ++do { \ ++ unsigned int offset = \ ++ (unsigned long) callee - (unsigned long) caller; \ ++ call[0] = to_auipc_ra(offset); \ ++ call[1] = to_jalr_ra(offset); \ ++} while (0) + + /* + * Let auipc+jalr be the basic *mcount unit*, so we make it 8 bytes here. +diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c +index 7f1e5203de886..47b43d8ee9a6c 100644 +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -57,12 +57,15 @@ static int ftrace_check_current_call(unsigned long hook_pos, + } + + static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, +- bool enable) ++ bool enable, bool ra) + { + unsigned int call[2]; + unsigned int nops[2] = {NOP4, NOP4}; + +- make_call(hook_pos, target, call); ++ if (ra) ++ make_call_ra(hook_pos, target, call); ++ else ++ make_call_t0(hook_pos, target, call); + + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ + if (patch_text_nosync +@@ -72,42 +75,13 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, + return 0; + } + +-/* +- * Put 5 instructions with 16 bytes at the front of function within +- * patchable function entry nops' area. +- * +- * 0: REG_S ra, -SZREG(sp) +- * 1: auipc ra, 0x? +- * 2: jalr -?(ra) +- * 3: REG_L ra, -SZREG(sp) +- * +- * So the opcodes is: +- * 0: 0xfe113c23 (sd)/0xfe112e23 (sw) +- * 1: 0x???????? -> auipc +- * 2: 0x???????? -> jalr +- * 3: 0xff813083 (ld)/0xffc12083 (lw) +- */ +-#if __riscv_xlen == 64 +-#define INSN0 0xfe113c23 +-#define INSN3 0xff813083 +-#elif __riscv_xlen == 32 +-#define INSN0 0xfe112e23 +-#define INSN3 0xffc12083 +-#endif +- +-#define FUNC_ENTRY_SIZE 16 +-#define FUNC_ENTRY_JMP 4 +- + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) + { +- unsigned int call[4] = {INSN0, 0, 0, INSN3}; +- unsigned long target = addr; +- unsigned long caller = rec->ip + FUNC_ENTRY_JMP; ++ unsigned int call[2]; + +- call[1] = to_auipc_insn((unsigned int)(target - caller)); +- call[2] = to_jalr_insn((unsigned int)(target - caller)); ++ make_call_t0(rec->ip, addr, call); + +- if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE)) ++ if (patch_text_nosync((void *)rec->ip, call, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; +@@ -116,15 +90,14 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) + { +- unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4}; ++ unsigned int nops[2] = {NOP4, NOP4}; + +- if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE)) ++ if (patch_text_nosync((void *)rec->ip, nops, MCOUNT_INSN_SIZE)) + return -EPERM; + + return 0; + } + +- + /* + * This is called early on, and isn't wrapped by + * ftrace_arch_code_modify_{prepare,post_process}() and therefor doesn't hold +@@ -146,10 +119,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) + int ftrace_update_ftrace_func(ftrace_func_t func) + { + int ret = __ftrace_modify_call((unsigned long)&ftrace_call, +- (unsigned long)func, true); ++ (unsigned long)func, true, true); + if (!ret) { + ret = __ftrace_modify_call((unsigned long)&ftrace_regs_call, +- (unsigned long)func, true); ++ (unsigned long)func, true, true); + } + + return ret; +@@ -166,16 +139,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) + { + unsigned int call[2]; +- unsigned long caller = rec->ip + FUNC_ENTRY_JMP; ++ unsigned long caller = rec->ip; + int ret; + +- make_call(caller, old_addr, call); ++ make_call_t0(caller, old_addr, call); + ret = ftrace_check_current_call(caller, call); + + if (ret) + return ret; + +- return __ftrace_modify_call(caller, addr, true); ++ return __ftrace_modify_call(caller, addr, true, false); + } + #endif + +@@ -210,12 +183,12 @@ int ftrace_enable_ftrace_graph_caller(void) + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, +- (unsigned long)&prepare_ftrace_return, true); ++ (unsigned long)&prepare_ftrace_return, true, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, +- (unsigned long)&prepare_ftrace_return, true); ++ (unsigned long)&prepare_ftrace_return, true, true); + } + + int ftrace_disable_ftrace_graph_caller(void) +@@ -223,12 +196,12 @@ int ftrace_disable_ftrace_graph_caller(void) + int ret; + + ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, +- (unsigned long)&prepare_ftrace_return, false); ++ (unsigned long)&prepare_ftrace_return, false, true); + if (ret) + return ret; + + return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, +- (unsigned long)&prepare_ftrace_return, false); ++ (unsigned long)&prepare_ftrace_return, false, true); + } + #endif /* CONFIG_DYNAMIC_FTRACE */ + #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S +index d171eca623b6f..125de818d1bab 100644 +--- a/arch/riscv/kernel/mcount-dyn.S ++++ b/arch/riscv/kernel/mcount-dyn.S +@@ -13,8 +13,8 @@ + + .text + +-#define FENTRY_RA_OFFSET 12 +-#define ABI_SIZE_ON_STACK 72 ++#define FENTRY_RA_OFFSET 8 ++#define ABI_SIZE_ON_STACK 80 + #define ABI_A0 0 + #define ABI_A1 8 + #define ABI_A2 16 +@@ -23,10 +23,10 @@ + #define ABI_A5 40 + #define ABI_A6 48 + #define ABI_A7 56 +-#define ABI_RA 64 ++#define ABI_T0 64 ++#define ABI_RA 72 + + .macro SAVE_ABI +- addi sp, sp, -SZREG + addi sp, sp, -ABI_SIZE_ON_STACK + + REG_S a0, ABI_A0(sp) +@@ -37,6 +37,7 @@ + REG_S a5, ABI_A5(sp) + REG_S a6, ABI_A6(sp) + REG_S a7, ABI_A7(sp) ++ REG_S t0, ABI_T0(sp) + REG_S ra, ABI_RA(sp) + .endm + +@@ -49,24 +50,18 @@ + REG_L a5, ABI_A5(sp) + REG_L a6, ABI_A6(sp) + REG_L a7, ABI_A7(sp) ++ REG_L t0, ABI_T0(sp) + REG_L ra, ABI_RA(sp) + + addi sp, sp, ABI_SIZE_ON_STACK +- addi sp, sp, SZREG + .endm + + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + .macro SAVE_ALL +- addi sp, sp, -SZREG + addi sp, sp, -PT_SIZE_ON_STACK + +- REG_S x1, PT_EPC(sp) +- addi sp, sp, PT_SIZE_ON_STACK +- REG_L x1, (sp) +- addi sp, sp, -PT_SIZE_ON_STACK ++ REG_S t0, PT_EPC(sp) + REG_S x1, PT_RA(sp) +- REG_L x1, PT_EPC(sp) +- + REG_S x2, PT_SP(sp) + REG_S x3, PT_GP(sp) + REG_S x4, PT_TP(sp) +@@ -100,15 +95,11 @@ + .endm + + .macro RESTORE_ALL ++ REG_L t0, PT_EPC(sp) + REG_L x1, PT_RA(sp) +- addi sp, sp, PT_SIZE_ON_STACK +- REG_S x1, (sp) +- addi sp, sp, -PT_SIZE_ON_STACK +- REG_L x1, PT_EPC(sp) + REG_L x2, PT_SP(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) +- REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) +@@ -137,17 +128,16 @@ + REG_L x31, PT_T6(sp) + + addi sp, sp, PT_SIZE_ON_STACK +- addi sp, sp, SZREG + .endm + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + + ENTRY(ftrace_caller) + SAVE_ABI + +- addi a0, ra, -FENTRY_RA_OFFSET ++ addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) +- REG_L a1, ABI_SIZE_ON_STACK(sp) ++ mv a1, ra + mv a3, sp + + ftrace_call: +@@ -155,8 +145,8 @@ ftrace_call: + call ftrace_stub + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +- addi a0, sp, ABI_SIZE_ON_STACK +- REG_L a1, ABI_RA(sp) ++ addi a0, sp, ABI_RA ++ REG_L a1, ABI_T0(sp) + addi a1, a1, -FENTRY_RA_OFFSET + #ifdef HAVE_FUNCTION_GRAPH_FP_TEST + mv a2, s0 +@@ -166,17 +156,17 @@ ftrace_graph_call: + call ftrace_stub + #endif + RESTORE_ABI +- ret ++ jr t0 + ENDPROC(ftrace_caller) + + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS + ENTRY(ftrace_regs_caller) + SAVE_ALL + +- addi a0, ra, -FENTRY_RA_OFFSET ++ addi a0, t0, -FENTRY_RA_OFFSET + la a1, function_trace_op + REG_L a2, 0(a1) +- REG_L a1, PT_SIZE_ON_STACK(sp) ++ mv a1, ra + mv a3, sp + + ftrace_regs_call: +@@ -196,6 +186,6 @@ ftrace_graph_regs_call: + #endif + + RESTORE_ALL +- ret ++ jr t0 + ENDPROC(ftrace_regs_caller) + #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +-- +2.39.2 + diff --git a/queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch b/queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch new file mode 100644 index 00000000000..44201fe0cf6 --- /dev/null +++ b/queue-5.10/riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch @@ -0,0 +1,60 @@ +From 008af635bed0d4f0d83564b2b3986ed14472119b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 12 Jan 2023 04:05:58 -0500 +Subject: riscv: ftrace: Remove wasted nops for !RISCV_ISA_C + +From: Guo Ren + +[ Upstream commit 409c8fb20c66df7150e592747412438c04aeb11f ] + +When CONFIG_RISCV_ISA_C=n, -fpatchable-function-entry=8 would generate +more nops than we expect. Because it treat nop opcode as 0x00000013 +instead of 0x0001. + +Dump of assembler code for function dw_pcie_free_msi: + 0xffffffff806fce94 <+0>: sd ra,-8(sp) + 0xffffffff806fce98 <+4>: auipc ra,0xff90f + 0xffffffff806fce9c <+8>: jalr -684(ra) # 0xffffffff8000bbec + + 0xffffffff806fcea0 <+12>: ld ra,-8(sp) + 0xffffffff806fcea4 <+16>: nop /* wasted */ + 0xffffffff806fcea8 <+20>: nop /* wasted */ + 0xffffffff806fceac <+24>: nop /* wasted */ + 0xffffffff806fceb0 <+28>: nop /* wasted */ + 0xffffffff806fceb4 <+0>: addi sp,sp,-48 + 0xffffffff806fceb8 <+4>: sd s0,32(sp) + 0xffffffff806fcebc <+8>: sd s1,24(sp) + 0xffffffff806fcec0 <+12>: sd s2,16(sp) + 0xffffffff806fcec4 <+16>: sd s3,8(sp) + 0xffffffff806fcec8 <+20>: sd ra,40(sp) + 0xffffffff806fcecc <+24>: addi s0,sp,48 + +Signed-off-by: Guo Ren +Signed-off-by: Guo Ren +Link: https://lore.kernel.org/r/20230112090603.1295340-3-guoren@kernel.org +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/Makefile | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile +index 6c1ef42d5a0df..3e3467dbbf73f 100644 +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -13,7 +13,11 @@ LDFLAGS_vmlinux := + ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ++ifeq ($(CONFIG_RISCV_ISA_C),y) + CC_FLAGS_FTRACE := -fpatchable-function-entry=8 ++else ++ CC_FLAGS_FTRACE := -fpatchable-function-entry=4 ++endif + endif + + ifeq ($(CONFIG_CMODEL_MEDLOW),y) +-- +2.39.2 + diff --git a/queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch b/queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch new file mode 100644 index 00000000000..ebb2eb0bed8 --- /dev/null +++ b/queue-5.10/riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch @@ -0,0 +1,99 @@ +From 0675e74ed638f0e92597d9b136e83ee75cf74541 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 8 Mar 2023 10:16:39 +0100 +Subject: riscv: Use READ_ONCE_NOCHECK in imprecise unwinding stack mode + +From: Alexandre Ghiti + +[ Upstream commit 76950340cf03b149412fe0d5f0810e52ac1df8cb ] + +When CONFIG_FRAME_POINTER is unset, the stack unwinding function +walk_stackframe randomly reads the stack and then, when KASAN is enabled, +it can lead to the following backtrace: + +[ 0.000000] ================================================================== +[ 0.000000] BUG: KASAN: stack-out-of-bounds in walk_stackframe+0xa6/0x11a +[ 0.000000] Read of size 8 at addr ffffffff81807c40 by task swapper/0 +[ 0.000000] +[ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 6.2.0-12919-g24203e6db61f #43 +[ 0.000000] Hardware name: riscv-virtio,qemu (DT) +[ 0.000000] Call Trace: +[ 0.000000] [] walk_stackframe+0x0/0x11a +[ 0.000000] [] init_param_lock+0x26/0x2a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] dump_stack_lvl+0x22/0x36 +[ 0.000000] [] print_report+0x198/0x4a8 +[ 0.000000] [] init_param_lock+0x26/0x2a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] kasan_report+0x9a/0xc8 +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] walk_stackframe+0xa2/0x11a +[ 0.000000] [] desc_make_final+0x80/0x84 +[ 0.000000] [] stack_trace_save+0x88/0xa6 +[ 0.000000] [] filter_irq_stacks+0x72/0x76 +[ 0.000000] [] devkmsg_read+0x32a/0x32e +[ 0.000000] [] kasan_save_stack+0x28/0x52 +[ 0.000000] [] desc_make_final+0x7c/0x84 +[ 0.000000] [] stack_trace_save+0x84/0xa6 +[ 0.000000] [] kasan_set_track+0x12/0x20 +[ 0.000000] [] __kasan_slab_alloc+0x58/0x5e +[ 0.000000] [] __kmem_cache_create+0x21e/0x39a +[ 0.000000] [] create_boot_cache+0x70/0x9c +[ 0.000000] [] kmem_cache_init+0x6c/0x11e +[ 0.000000] [] mm_init+0xd8/0xfe +[ 0.000000] [] start_kernel+0x190/0x3ca +[ 0.000000] +[ 0.000000] The buggy address belongs to stack of task swapper/0 +[ 0.000000] and is located at offset 0 in frame: +[ 0.000000] stack_trace_save+0x0/0xa6 +[ 0.000000] +[ 0.000000] This frame has 1 object: +[ 0.000000] [32, 56) 'c' +[ 0.000000] +[ 0.000000] The buggy address belongs to the physical page: +[ 0.000000] page:(____ptrval____) refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x81a07 +[ 0.000000] flags: 0x1000(reserved|zone=0) +[ 0.000000] raw: 0000000000001000 ff600003f1e3d150 ff600003f1e3d150 0000000000000000 +[ 0.000000] raw: 0000000000000000 0000000000000000 00000001ffffffff +[ 0.000000] page dumped because: kasan: bad access detected +[ 0.000000] +[ 0.000000] Memory state around the buggy address: +[ 0.000000] ffffffff81807b00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ffffffff81807b80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] >ffffffff81807c00: 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00 00 f3 +[ 0.000000] ^ +[ 0.000000] ffffffff81807c80: f3 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ffffffff81807d00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +[ 0.000000] ================================================================== + +Fix that by using READ_ONCE_NOCHECK when reading the stack in imprecise +mode. + +Fixes: 5d8544e2d007 ("RISC-V: Generic library routines and assembly") +Reported-by: Chathura Rajapaksha +Link: https://lore.kernel.org/all/CAD7mqryDQCYyJ1gAmtMm8SASMWAQ4i103ptTb0f6Oda=tPY2=A@mail.gmail.com/ +Suggested-by: Dmitry Vyukov +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20230308091639.602024-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/kernel/stacktrace.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c +index 1e53fbe5eb783..9c34735c1e771 100644 +--- a/arch/riscv/kernel/stacktrace.c ++++ b/arch/riscv/kernel/stacktrace.c +@@ -96,7 +96,7 @@ void notrace walk_stackframe(struct task_struct *task, + while (!kstack_end(ksp)) { + if (__kernel_text_address(pc) && unlikely(fn(pc, arg))) + break; +- pc = (*ksp++) - 0x4; ++ pc = READ_ONCE_NOCHECK(*ksp++) - 0x4; + } + } + +-- +2.39.2 + diff --git a/queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch b/queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch new file mode 100644 index 00000000000..ee0f5fe2b11 --- /dev/null +++ b/queue-5.10/riscv-using-patchable_function_entry-instead-of-mcou.patch @@ -0,0 +1,600 @@ +From abb0542c5e210048f5905f02935615dc315a5240 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 17 Dec 2020 16:01:41 +0000 +Subject: riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT + +From: Guo Ren + +[ Upstream commit afc76b8b80112189b6f11e67e19cf58301944814 ] + +This patch changes the current detour mechanism of dynamic ftrace +which has been discussed during LPC 2020 RISCV-MC [1]. + +Before the patch, we used mcount for detour: +: + addi sp,sp,-16 + sd ra,8(sp) + sd s0,0(sp) + addi s0,sp,16 + mv a5,ra + mv a0,a5 + auipc ra,0x0 -> nop + jalr -296(ra) <_mcount@plt> ->nop + ... + +After the patch, we use nop call site area for detour: +: + nop -> REG_S ra, -SZREG(sp) + nop -> auipc ra, 0x? + nop -> jalr ?(ra) + nop -> REG_L ra, -SZREG(sp) + ... + +The mcount mechanism is mixed with gcc function prologue which is +not very clear. The patchable function entry just put 16 bytes nop +before the front of the function prologue which could be filled +with a separated detour mechanism. + +[1] https://www.linuxplumbersconf.org/event/7/contributions/807/ + +Signed-off-by: Guo Ren +Signed-off-by: Palmer Dabbelt +Stable-dep-of: 409c8fb20c66 ("riscv: ftrace: Remove wasted nops for !RISCV_ISA_C") +Signed-off-by: Sasha Levin +--- + arch/riscv/Makefile | 2 + + arch/riscv/kernel/ftrace.c | 95 ++++----- + arch/riscv/kernel/mcount-dyn.S | 342 +++++++++++++++------------------ + 3 files changed, 204 insertions(+), 235 deletions(-) + +diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile +index 9446282b52bab..6c1ef42d5a0df 100644 +--- a/arch/riscv/Makefile ++++ b/arch/riscv/Makefile +@@ -12,6 +12,8 @@ OBJCOPYFLAGS := -O binary + LDFLAGS_vmlinux := + ifeq ($(CONFIG_DYNAMIC_FTRACE),y) + LDFLAGS_vmlinux := --no-relax ++ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ++ CC_FLAGS_FTRACE := -fpatchable-function-entry=8 + endif + + ifeq ($(CONFIG_CMODEL_MEDLOW),y) +diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c +index 765b62434f303..7f1e5203de886 100644 +--- a/arch/riscv/kernel/ftrace.c ++++ b/arch/riscv/kernel/ftrace.c +@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, + return 0; + } + ++/* ++ * Put 5 instructions with 16 bytes at the front of function within ++ * patchable function entry nops' area. ++ * ++ * 0: REG_S ra, -SZREG(sp) ++ * 1: auipc ra, 0x? ++ * 2: jalr -?(ra) ++ * 3: REG_L ra, -SZREG(sp) ++ * ++ * So the opcodes is: ++ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw) ++ * 1: 0x???????? -> auipc ++ * 2: 0x???????? -> jalr ++ * 3: 0xff813083 (ld)/0xffc12083 (lw) ++ */ ++#if __riscv_xlen == 64 ++#define INSN0 0xfe113c23 ++#define INSN3 0xff813083 ++#elif __riscv_xlen == 32 ++#define INSN0 0xfe112e23 ++#define INSN3 0xffc12083 ++#endif ++ ++#define FUNC_ENTRY_SIZE 16 ++#define FUNC_ENTRY_JMP 4 ++ + int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) + { +- int ret = ftrace_check_current_call(rec->ip, NULL); ++ unsigned int call[4] = {INSN0, 0, 0, INSN3}; ++ unsigned long target = addr; ++ unsigned long caller = rec->ip + FUNC_ENTRY_JMP; + +- if (ret) +- return ret; ++ call[1] = to_auipc_insn((unsigned int)(target - caller)); ++ call[2] = to_jalr_insn((unsigned int)(target - caller)); + +- return __ftrace_modify_call(rec->ip, addr, true); ++ if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE)) ++ return -EPERM; ++ ++ return 0; + } + + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) + { +- unsigned int call[2]; +- int ret; ++ unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4}; + +- make_call(rec->ip, addr, call); +- ret = ftrace_check_current_call(rec->ip, call); +- +- if (ret) +- return ret; ++ if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE)) ++ return -EPERM; + +- return __ftrace_modify_call(rec->ip, addr, false); ++ return 0; + } + + +@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) + { + unsigned int call[2]; ++ unsigned long caller = rec->ip + FUNC_ENTRY_JMP; + int ret; + +- make_call(rec->ip, old_addr, call); +- ret = ftrace_check_current_call(rec->ip, call); ++ make_call(caller, old_addr, call); ++ ret = ftrace_check_current_call(caller, call); + + if (ret) + return ret; + +- return __ftrace_modify_call(rec->ip, addr, true); ++ return __ftrace_modify_call(caller, addr, true); + } + #endif + +@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + + #ifdef CONFIG_DYNAMIC_FTRACE + extern void ftrace_graph_call(void); ++extern void ftrace_graph_regs_call(void); + int ftrace_enable_ftrace_graph_caller(void) + { +- unsigned int call[2]; +- static int init_graph = 1; + int ret; + +- make_call(&ftrace_graph_call, &ftrace_stub, call); +- +- /* +- * When enabling graph tracer for the first time, ftrace_graph_call +- * should contains a call to ftrace_stub. Once it has been disabled, +- * the 8-bytes at the position becomes NOPs. +- */ +- if (init_graph) { +- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, +- call); +- init_graph = 0; +- } else { +- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, +- NULL); +- } +- ++ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, ++ (unsigned long)&prepare_ftrace_return, true); + if (ret) + return ret; + +- return __ftrace_modify_call((unsigned long)&ftrace_graph_call, ++ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + (unsigned long)&prepare_ftrace_return, true); + } + + int ftrace_disable_ftrace_graph_caller(void) + { +- unsigned int call[2]; + int ret; + +- make_call(&ftrace_graph_call, &prepare_ftrace_return, call); +- +- /* +- * This is to make sure that ftrace_enable_ftrace_graph_caller +- * did the right thing. +- */ +- ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call, +- call); +- ++ ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call, ++ (unsigned long)&prepare_ftrace_return, false); + if (ret) + return ret; + +- return __ftrace_modify_call((unsigned long)&ftrace_graph_call, ++ return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call, + (unsigned long)&prepare_ftrace_return, false); + } + #endif /* CONFIG_DYNAMIC_FTRACE */ +diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S +index 35a6ed76cb8b7..d171eca623b6f 100644 +--- a/arch/riscv/kernel/mcount-dyn.S ++++ b/arch/riscv/kernel/mcount-dyn.S +@@ -13,224 +13,186 @@ + + .text + +- .macro SAVE_ABI_STATE +-#ifdef CONFIG_FUNCTION_GRAPH_TRACER +- addi sp, sp, -48 +- sd s0, 32(sp) +- sd ra, 40(sp) +- addi s0, sp, 48 +- sd t0, 24(sp) +- sd t1, 16(sp) +-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST +- sd t2, 8(sp) +-#endif +-#else +- addi sp, sp, -16 +- sd s0, 0(sp) +- sd ra, 8(sp) +- addi s0, sp, 16 +-#endif ++#define FENTRY_RA_OFFSET 12 ++#define ABI_SIZE_ON_STACK 72 ++#define ABI_A0 0 ++#define ABI_A1 8 ++#define ABI_A2 16 ++#define ABI_A3 24 ++#define ABI_A4 32 ++#define ABI_A5 40 ++#define ABI_A6 48 ++#define ABI_A7 56 ++#define ABI_RA 64 ++ ++ .macro SAVE_ABI ++ addi sp, sp, -SZREG ++ addi sp, sp, -ABI_SIZE_ON_STACK ++ ++ REG_S a0, ABI_A0(sp) ++ REG_S a1, ABI_A1(sp) ++ REG_S a2, ABI_A2(sp) ++ REG_S a3, ABI_A3(sp) ++ REG_S a4, ABI_A4(sp) ++ REG_S a5, ABI_A5(sp) ++ REG_S a6, ABI_A6(sp) ++ REG_S a7, ABI_A7(sp) ++ REG_S ra, ABI_RA(sp) + .endm + +- .macro RESTORE_ABI_STATE +-#ifdef CONFIG_FUNCTION_GRAPH_TRACER +- ld s0, 32(sp) +- ld ra, 40(sp) +- addi sp, sp, 48 +-#else +- ld ra, 8(sp) +- ld s0, 0(sp) +- addi sp, sp, 16 +-#endif ++ .macro RESTORE_ABI ++ REG_L a0, ABI_A0(sp) ++ REG_L a1, ABI_A1(sp) ++ REG_L a2, ABI_A2(sp) ++ REG_L a3, ABI_A3(sp) ++ REG_L a4, ABI_A4(sp) ++ REG_L a5, ABI_A5(sp) ++ REG_L a6, ABI_A6(sp) ++ REG_L a7, ABI_A7(sp) ++ REG_L ra, ABI_RA(sp) ++ ++ addi sp, sp, ABI_SIZE_ON_STACK ++ addi sp, sp, SZREG + .endm + +- .macro RESTORE_GRAPH_ARGS +- ld a0, 24(sp) +- ld a1, 16(sp) +-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST +- ld a2, 8(sp) +-#endif ++#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS ++ .macro SAVE_ALL ++ addi sp, sp, -SZREG ++ addi sp, sp, -PT_SIZE_ON_STACK ++ ++ REG_S x1, PT_EPC(sp) ++ addi sp, sp, PT_SIZE_ON_STACK ++ REG_L x1, (sp) ++ addi sp, sp, -PT_SIZE_ON_STACK ++ REG_S x1, PT_RA(sp) ++ REG_L x1, PT_EPC(sp) ++ ++ REG_S x2, PT_SP(sp) ++ REG_S x3, PT_GP(sp) ++ REG_S x4, PT_TP(sp) ++ REG_S x5, PT_T0(sp) ++ REG_S x6, PT_T1(sp) ++ REG_S x7, PT_T2(sp) ++ REG_S x8, PT_S0(sp) ++ REG_S x9, PT_S1(sp) ++ REG_S x10, PT_A0(sp) ++ REG_S x11, PT_A1(sp) ++ REG_S x12, PT_A2(sp) ++ REG_S x13, PT_A3(sp) ++ REG_S x14, PT_A4(sp) ++ REG_S x15, PT_A5(sp) ++ REG_S x16, PT_A6(sp) ++ REG_S x17, PT_A7(sp) ++ REG_S x18, PT_S2(sp) ++ REG_S x19, PT_S3(sp) ++ REG_S x20, PT_S4(sp) ++ REG_S x21, PT_S5(sp) ++ REG_S x22, PT_S6(sp) ++ REG_S x23, PT_S7(sp) ++ REG_S x24, PT_S8(sp) ++ REG_S x25, PT_S9(sp) ++ REG_S x26, PT_S10(sp) ++ REG_S x27, PT_S11(sp) ++ REG_S x28, PT_T3(sp) ++ REG_S x29, PT_T4(sp) ++ REG_S x30, PT_T5(sp) ++ REG_S x31, PT_T6(sp) + .endm + +-ENTRY(ftrace_graph_caller) +- addi sp, sp, -16 +- sd s0, 0(sp) +- sd ra, 8(sp) +- addi s0, sp, 16 +-ftrace_graph_call: +- .global ftrace_graph_call +- /* +- * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the +- * call below. Check ftrace_modify_all_code for details. +- */ +- call ftrace_stub +- ld ra, 8(sp) +- ld s0, 0(sp) +- addi sp, sp, 16 +- ret +-ENDPROC(ftrace_graph_caller) ++ .macro RESTORE_ALL ++ REG_L x1, PT_RA(sp) ++ addi sp, sp, PT_SIZE_ON_STACK ++ REG_S x1, (sp) ++ addi sp, sp, -PT_SIZE_ON_STACK ++ REG_L x1, PT_EPC(sp) ++ REG_L x2, PT_SP(sp) ++ REG_L x3, PT_GP(sp) ++ REG_L x4, PT_TP(sp) ++ REG_L x5, PT_T0(sp) ++ REG_L x6, PT_T1(sp) ++ REG_L x7, PT_T2(sp) ++ REG_L x8, PT_S0(sp) ++ REG_L x9, PT_S1(sp) ++ REG_L x10, PT_A0(sp) ++ REG_L x11, PT_A1(sp) ++ REG_L x12, PT_A2(sp) ++ REG_L x13, PT_A3(sp) ++ REG_L x14, PT_A4(sp) ++ REG_L x15, PT_A5(sp) ++ REG_L x16, PT_A6(sp) ++ REG_L x17, PT_A7(sp) ++ REG_L x18, PT_S2(sp) ++ REG_L x19, PT_S3(sp) ++ REG_L x20, PT_S4(sp) ++ REG_L x21, PT_S5(sp) ++ REG_L x22, PT_S6(sp) ++ REG_L x23, PT_S7(sp) ++ REG_L x24, PT_S8(sp) ++ REG_L x25, PT_S9(sp) ++ REG_L x26, PT_S10(sp) ++ REG_L x27, PT_S11(sp) ++ REG_L x28, PT_T3(sp) ++ REG_L x29, PT_T4(sp) ++ REG_L x30, PT_T5(sp) ++ REG_L x31, PT_T6(sp) ++ ++ addi sp, sp, PT_SIZE_ON_STACK ++ addi sp, sp, SZREG ++ .endm ++#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + + ENTRY(ftrace_caller) +- /* +- * a0: the address in the caller when calling ftrace_caller +- * a1: the caller's return address +- * a2: the address of global variable function_trace_op +- */ +- ld a1, -8(s0) +- addi a0, ra, -MCOUNT_INSN_SIZE +- la t5, function_trace_op +- ld a2, 0(t5) ++ SAVE_ABI + +-#ifdef CONFIG_FUNCTION_GRAPH_TRACER +- /* +- * the graph tracer (specifically, prepare_ftrace_return) needs these +- * arguments but for now the function tracer occupies the regs, so we +- * save them in temporary regs to recover later. +- */ +- addi t0, s0, -8 +- mv t1, a0 +-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST +- ld t2, -16(s0) +-#endif +-#endif ++ addi a0, ra, -FENTRY_RA_OFFSET ++ la a1, function_trace_op ++ REG_L a2, 0(a1) ++ REG_L a1, ABI_SIZE_ON_STACK(sp) ++ mv a3, sp + +- SAVE_ABI_STATE + ftrace_call: + .global ftrace_call +- /* +- * For the dynamic ftrace to work, here we should reserve at least +- * 8 bytes for a functional auipc-jalr pair. The following call +- * serves this purpose. +- * +- * Calling ftrace_update_ftrace_func would overwrite the nops below. +- * Check ftrace_modify_all_code for details. +- */ + call ftrace_stub + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +- RESTORE_GRAPH_ARGS +- call ftrace_graph_caller ++ addi a0, sp, ABI_SIZE_ON_STACK ++ REG_L a1, ABI_RA(sp) ++ addi a1, a1, -FENTRY_RA_OFFSET ++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST ++ mv a2, s0 + #endif +- +- RESTORE_ABI_STATE ++ftrace_graph_call: ++ .global ftrace_graph_call ++ call ftrace_stub ++#endif ++ RESTORE_ABI + ret + ENDPROC(ftrace_caller) + + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +- .macro SAVE_ALL +- addi sp, sp, -(PT_SIZE_ON_STACK+16) +- sd s0, (PT_SIZE_ON_STACK)(sp) +- sd ra, (PT_SIZE_ON_STACK+8)(sp) +- addi s0, sp, (PT_SIZE_ON_STACK+16) +- +- sd x1, PT_RA(sp) +- sd x2, PT_SP(sp) +- sd x3, PT_GP(sp) +- sd x4, PT_TP(sp) +- sd x5, PT_T0(sp) +- sd x6, PT_T1(sp) +- sd x7, PT_T2(sp) +- sd x8, PT_S0(sp) +- sd x9, PT_S1(sp) +- sd x10, PT_A0(sp) +- sd x11, PT_A1(sp) +- sd x12, PT_A2(sp) +- sd x13, PT_A3(sp) +- sd x14, PT_A4(sp) +- sd x15, PT_A5(sp) +- sd x16, PT_A6(sp) +- sd x17, PT_A7(sp) +- sd x18, PT_S2(sp) +- sd x19, PT_S3(sp) +- sd x20, PT_S4(sp) +- sd x21, PT_S5(sp) +- sd x22, PT_S6(sp) +- sd x23, PT_S7(sp) +- sd x24, PT_S8(sp) +- sd x25, PT_S9(sp) +- sd x26, PT_S10(sp) +- sd x27, PT_S11(sp) +- sd x28, PT_T3(sp) +- sd x29, PT_T4(sp) +- sd x30, PT_T5(sp) +- sd x31, PT_T6(sp) +- .endm +- +- .macro RESTORE_ALL +- ld x1, PT_RA(sp) +- ld x2, PT_SP(sp) +- ld x3, PT_GP(sp) +- ld x4, PT_TP(sp) +- ld x5, PT_T0(sp) +- ld x6, PT_T1(sp) +- ld x7, PT_T2(sp) +- ld x8, PT_S0(sp) +- ld x9, PT_S1(sp) +- ld x10, PT_A0(sp) +- ld x11, PT_A1(sp) +- ld x12, PT_A2(sp) +- ld x13, PT_A3(sp) +- ld x14, PT_A4(sp) +- ld x15, PT_A5(sp) +- ld x16, PT_A6(sp) +- ld x17, PT_A7(sp) +- ld x18, PT_S2(sp) +- ld x19, PT_S3(sp) +- ld x20, PT_S4(sp) +- ld x21, PT_S5(sp) +- ld x22, PT_S6(sp) +- ld x23, PT_S7(sp) +- ld x24, PT_S8(sp) +- ld x25, PT_S9(sp) +- ld x26, PT_S10(sp) +- ld x27, PT_S11(sp) +- ld x28, PT_T3(sp) +- ld x29, PT_T4(sp) +- ld x30, PT_T5(sp) +- ld x31, PT_T6(sp) +- +- ld s0, (PT_SIZE_ON_STACK)(sp) +- ld ra, (PT_SIZE_ON_STACK+8)(sp) +- addi sp, sp, (PT_SIZE_ON_STACK+16) +- .endm +- +- .macro RESTORE_GRAPH_REG_ARGS +- ld a0, PT_T0(sp) +- ld a1, PT_T1(sp) +-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST +- ld a2, PT_T2(sp) +-#endif +- .endm +- +-/* +- * Most of the contents are the same as ftrace_caller. +- */ + ENTRY(ftrace_regs_caller) +- /* +- * a3: the address of all registers in the stack +- */ +- ld a1, -8(s0) +- addi a0, ra, -MCOUNT_INSN_SIZE +- la t5, function_trace_op +- ld a2, 0(t5) +- addi a3, sp, -(PT_SIZE_ON_STACK+16) +- +-#ifdef CONFIG_FUNCTION_GRAPH_TRACER +- addi t0, s0, -8 +- mv t1, a0 +-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST +- ld t2, -16(s0) +-#endif +-#endif + SAVE_ALL + ++ addi a0, ra, -FENTRY_RA_OFFSET ++ la a1, function_trace_op ++ REG_L a2, 0(a1) ++ REG_L a1, PT_SIZE_ON_STACK(sp) ++ mv a3, sp ++ + ftrace_regs_call: + .global ftrace_regs_call + call ftrace_stub + + #ifdef CONFIG_FUNCTION_GRAPH_TRACER +- RESTORE_GRAPH_REG_ARGS +- call ftrace_graph_caller ++ addi a0, sp, PT_RA ++ REG_L a1, PT_EPC(sp) ++ addi a1, a1, -FENTRY_RA_OFFSET ++#ifdef HAVE_FUNCTION_GRAPH_FP_TEST ++ mv a2, s0 ++#endif ++ftrace_graph_regs_call: ++ .global ftrace_graph_regs_call ++ call ftrace_stub + #endif + + RESTORE_ALL +-- +2.39.2 + diff --git a/queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch b/queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch new file mode 100644 index 00000000000..7e966f296c4 --- /dev/null +++ b/queue-5.10/scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch @@ -0,0 +1,79 @@ +From f8a58485c0ab2ebd34421c68b42f82b6751ab879 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 10 Feb 2023 12:52:00 -0800 +Subject: scsi: core: Remove the /proc/scsi/${proc_name} directory earlier + +From: Bart Van Assche + +[ Upstream commit fc663711b94468f4e1427ebe289c9f05669699c9 ] + +Remove the /proc/scsi/${proc_name} directory earlier to fix a race +condition between unloading and reloading kernel modules. This fixes a bug +introduced in 2009 by commit 77c019768f06 ("[SCSI] fix /proc memory leak in +the SCSI core"). + +Fix the following kernel warning: + +proc_dir_entry 'scsi/scsi_debug' already registered +WARNING: CPU: 19 PID: 27986 at fs/proc/generic.c:376 proc_register+0x27d/0x2e0 +Call Trace: + proc_mkdir+0xb5/0xe0 + scsi_proc_hostdir_add+0xb5/0x170 + scsi_host_alloc+0x683/0x6c0 + sdebug_driver_probe+0x6b/0x2d0 [scsi_debug] + really_probe+0x159/0x540 + __driver_probe_device+0xdc/0x230 + driver_probe_device+0x4f/0x120 + __device_attach_driver+0xef/0x180 + bus_for_each_drv+0xe5/0x130 + __device_attach+0x127/0x290 + device_initial_probe+0x17/0x20 + bus_probe_device+0x110/0x130 + device_add+0x673/0xc80 + device_register+0x1e/0x30 + sdebug_add_host_helper+0x1a7/0x3b0 [scsi_debug] + scsi_debug_init+0x64f/0x1000 [scsi_debug] + do_one_initcall+0xd7/0x470 + do_init_module+0xe7/0x330 + load_module+0x122a/0x12c0 + __do_sys_finit_module+0x124/0x1a0 + __x64_sys_finit_module+0x46/0x50 + do_syscall_64+0x38/0x80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 + +Link: https://lore.kernel.org/r/20230210205200.36973-3-bvanassche@acm.org +Cc: Alan Stern +Cc: Yi Zhang +Cc: stable@vger.kernel.org +Fixes: 77c019768f06 ("[SCSI] fix /proc memory leak in the SCSI core") +Reported-by: Yi Zhang +Signed-off-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/hosts.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c +index d664c4650b2dd..fae0323242103 100644 +--- a/drivers/scsi/hosts.c ++++ b/drivers/scsi/hosts.c +@@ -180,6 +180,7 @@ void scsi_remove_host(struct Scsi_Host *shost) + scsi_forget_host(shost); + mutex_unlock(&shost->scan_mutex); + scsi_proc_host_rm(shost); ++ scsi_proc_hostdir_rm(shost->hostt); + + spin_lock_irqsave(shost->host_lock, flags); + if (scsi_host_set_state(shost, SHOST_DEL)) +@@ -321,6 +322,7 @@ static void scsi_host_dev_release(struct device *dev) + struct Scsi_Host *shost = dev_to_shost(dev); + struct device *parent = dev->parent; + ++ /* In case scsi_remove_host() has not been called. */ + scsi_proc_hostdir_rm(shost->hostt); + + /* Wait for functions invoked through call_rcu(&shost->rcu, ...) */ +-- +2.39.2 + diff --git a/queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch b/queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch new file mode 100644 index 00000000000..667acb0d60f --- /dev/null +++ b/queue-5.10/scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch @@ -0,0 +1,60 @@ +From bc488ae13b236a092e733fa7e55c51c6c308a204 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 2 Mar 2023 16:23:40 +0530 +Subject: scsi: megaraid_sas: Update max supported LD IDs to 240 + +From: Chandrakanth Patil + +[ Upstream commit bfa659177dcba48cf13f2bd88c1972f12a60bf1c ] + +The firmware only supports Logical Disk IDs up to 240 and LD ID 255 (0xFF) +is reserved for deleted LDs. However, in some cases, firmware was assigning +LD ID 254 (0xFE) to deleted LDs and this was causing the driver to mark the +wrong disk as deleted. This in turn caused the wrong disk device to be +taken offline by the SCSI midlayer. + +To address this issue, limit the LD ID range from 255 to 240. This ensures +the deleted LD ID is properly identified and removed by the driver without +accidently deleting any valid LDs. + +Fixes: ae6874ba4b43 ("scsi: megaraid_sas: Early detection of VD deletion through RaidMap update") +Reported-by: Martin K. Petersen +Signed-off-by: Chandrakanth Patil +Signed-off-by: Sumit Saxena +Link: https://lore.kernel.org/r/20230302105342.34933-2-chandrakanth.patil@broadcom.com +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/megaraid/megaraid_sas.h | 2 ++ + drivers/scsi/megaraid/megaraid_sas_fp.c | 2 +- + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/scsi/megaraid/megaraid_sas.h b/drivers/scsi/megaraid/megaraid_sas.h +index c088a848776ef..2d5b1d5978664 100644 +--- a/drivers/scsi/megaraid/megaraid_sas.h ++++ b/drivers/scsi/megaraid/megaraid_sas.h +@@ -1517,6 +1517,8 @@ struct megasas_ctrl_info { + #define MEGASAS_MAX_LD_IDS (MEGASAS_MAX_LD_CHANNELS * \ + MEGASAS_MAX_DEV_PER_CHANNEL) + ++#define MEGASAS_MAX_SUPPORTED_LD_IDS 240 ++ + #define MEGASAS_MAX_SECTORS (2*1024) + #define MEGASAS_MAX_SECTORS_IEEE (2*128) + #define MEGASAS_DBG_LVL 1 +diff --git a/drivers/scsi/megaraid/megaraid_sas_fp.c b/drivers/scsi/megaraid/megaraid_sas_fp.c +index 83f69c33b01a9..ec10d35b4685a 100644 +--- a/drivers/scsi/megaraid/megaraid_sas_fp.c ++++ b/drivers/scsi/megaraid/megaraid_sas_fp.c +@@ -358,7 +358,7 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance, u64 map_id) + ld = MR_TargetIdToLdGet(i, drv_map); + + /* For non existing VDs, iterate to next VD*/ +- if (ld >= (MAX_LOGICAL_DRIVES_EXT - 1)) ++ if (ld >= MEGASAS_MAX_SUPPORTED_LD_IDS) + continue; + + raid = MR_LdRaidGet(ld, drv_map); +-- +2.39.2 + diff --git a/queue-5.10/selftests-landlock-add-clang-format-exceptions.patch b/queue-5.10/selftests-landlock-add-clang-format-exceptions.patch new file mode 100644 index 00000000000..5ee7009ebd0 --- /dev/null +++ b/queue-5.10/selftests-landlock-add-clang-format-exceptions.patch @@ -0,0 +1,226 @@ +From 76f7cb6aeb2007c0d4febda804772cf6ae508604 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 6 May 2022 18:05:09 +0200 +Subject: selftests/landlock: Add clang-format exceptions +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +[ Upstream commit 4598d9abf4215e1e371a35683350d50122793c80 ] + +In preparation to a following commit, add clang-format on and +clang-format off stanzas around constant definitions and the TEST_F_FORK +macro. This enables to keep aligned values, which is much more readable +than packed definitions. + +Add other clang-format exceptions for FIXTURE() and +FIXTURE_VARIANT_ADD() declarations to force space before open brace, +which is reported by checkpatch.pl . + +Link: https://lore.kernel.org/r/20220506160513.523257-4-mic@digikod.net +Cc: stable@vger.kernel.org +Signed-off-by: Mickaël Salaün +Stable-dep-of: 8677e555f17f ("selftests/landlock: Test ptrace as much as possible with Yama") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/landlock/common.h | 2 ++ + tools/testing/selftests/landlock/fs_test.c | 23 ++++++++++++++----- + .../testing/selftests/landlock/ptrace_test.c | 20 +++++++++++++++- + 3 files changed, 38 insertions(+), 7 deletions(-) + +diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h +index 20e2a9286d710..61127fffbeb83 100644 +--- a/tools/testing/selftests/landlock/common.h ++++ b/tools/testing/selftests/landlock/common.h +@@ -29,6 +29,7 @@ + * this to be possible, we must not call abort() but instead exit smoothly + * (hence the step print). + */ ++/* clang-format off */ + #define TEST_F_FORK(fixture_name, test_name) \ + static void fixture_name##_##test_name##_child( \ + struct __test_metadata *_metadata, \ +@@ -75,6 +76,7 @@ + FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ + const FIXTURE_VARIANT(fixture_name) \ + __attribute__((unused)) *variant) ++/* clang-format on */ + + #ifndef landlock_create_ruleset + static inline int landlock_create_ruleset( +diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c +index db153452b110a..036d55836b9ec 100644 +--- a/tools/testing/selftests/landlock/fs_test.c ++++ b/tools/testing/selftests/landlock/fs_test.c +@@ -256,8 +256,9 @@ static void remove_layout1(struct __test_metadata *const _metadata) + EXPECT_EQ(0, remove_path(dir_s3d2)); + } + +-FIXTURE(layout1) { +-}; ++/* clang-format off */ ++FIXTURE(layout1) {}; ++/* clang-format on */ + + FIXTURE_SETUP(layout1) + { +@@ -411,6 +412,8 @@ TEST_F_FORK(layout1, inval) + ASSERT_EQ(0, close(ruleset_fd)); + } + ++/* clang-format off */ ++ + #define ACCESS_FILE ( \ + LANDLOCK_ACCESS_FS_EXECUTE | \ + LANDLOCK_ACCESS_FS_WRITE_FILE | \ +@@ -431,6 +434,8 @@ TEST_F_FORK(layout1, inval) + LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ + ACCESS_LAST) + ++/* clang-format on */ ++ + TEST_F_FORK(layout1, file_access_rights) + { + __u64 access; +@@ -487,6 +492,8 @@ struct rule { + __u64 access; + }; + ++/* clang-format off */ ++ + #define ACCESS_RO ( \ + LANDLOCK_ACCESS_FS_READ_FILE | \ + LANDLOCK_ACCESS_FS_READ_DIR) +@@ -495,6 +502,8 @@ struct rule { + ACCESS_RO | \ + LANDLOCK_ACCESS_FS_WRITE_FILE) + ++/* clang-format on */ ++ + static int create_ruleset(struct __test_metadata *const _metadata, + const __u64 handled_access_fs, const struct rule rules[]) + { +@@ -2105,8 +2114,9 @@ TEST_F_FORK(layout1, proc_pipe) + ASSERT_EQ(0, close(pipe_fds[1])); + } + +-FIXTURE(layout1_bind) { +-}; ++/* clang-format off */ ++FIXTURE(layout1_bind) {}; ++/* clang-format on */ + + FIXTURE_SETUP(layout1_bind) + { +@@ -2446,8 +2456,9 @@ static const char (*merge_sub_files[])[] = { + * └── work + */ + +-FIXTURE(layout2_overlay) { +-}; ++/* clang-format off */ ++FIXTURE(layout2_overlay) {}; ++/* clang-format on */ + + FIXTURE_SETUP(layout2_overlay) + { +diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c +index 15fbef9cc8496..090adadfe2dc3 100644 +--- a/tools/testing/selftests/landlock/ptrace_test.c ++++ b/tools/testing/selftests/landlock/ptrace_test.c +@@ -59,7 +59,9 @@ static int test_ptrace_read(const pid_t pid) + return 0; + } + +-FIXTURE(hierarchy) { }; ++/* clang-format off */ ++FIXTURE(hierarchy) {}; ++/* clang-format on */ + + FIXTURE_VARIANT(hierarchy) { + const bool domain_both; +@@ -83,7 +85,9 @@ FIXTURE_VARIANT(hierarchy) { + * \ P2 -> P1 : allow + * 'P2 + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { ++ /* clang-format on */ + .domain_both = false, + .domain_parent = false, + .domain_child = false, +@@ -98,7 +102,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { + * | P2 | + * '------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { ++ /* clang-format on */ + .domain_both = false, + .domain_parent = false, + .domain_child = true, +@@ -112,7 +118,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { + * ' + * P2 + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { ++ /* clang-format on */ + .domain_both = false, + .domain_parent = true, + .domain_child = false, +@@ -127,7 +135,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { + * | P2 | + * '------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { ++ /* clang-format on */ + .domain_both = false, + .domain_parent = true, + .domain_child = true, +@@ -142,7 +152,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { + * | P2 | + * '-------------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { ++ /* clang-format on */ + .domain_both = true, + .domain_parent = false, + .domain_child = false, +@@ -158,7 +170,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { + * | '------' | + * '-----------------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { ++ /* clang-format on */ + .domain_both = true, + .domain_parent = false, + .domain_child = true, +@@ -174,7 +188,9 @@ FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { + * | P2 | + * '-----------------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { ++ /* clang-format on */ + .domain_both = true, + .domain_parent = true, + .domain_child = false, +@@ -192,7 +208,9 @@ FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { + * | '------' | + * '-----------------' + */ ++/* clang-format off */ + FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) { ++ /* clang-format on */ + .domain_both = true, + .domain_parent = true, + .domain_child = true, +-- +2.39.2 + diff --git a/queue-5.10/selftests-landlock-add-user-space-tests.patch b/queue-5.10/selftests-landlock-add-user-space-tests.patch new file mode 100644 index 00000000000..2332bfa504e --- /dev/null +++ b/queue-5.10/selftests-landlock-add-user-space-tests.patch @@ -0,0 +1,3694 @@ +From 9f43c1d77d1e7cac1c93a6f95a6d91ebc4089a73 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 22 Apr 2021 17:41:20 +0200 +Subject: selftests/landlock: Add user space tests +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Mickaël Salaün + +[ Upstream commit e1199815b47be83346c03e20a3de76f934e4bb34 ] + +Test all Landlock system calls, ptrace hooks semantic and filesystem +access-control with multiple layouts. + +Test coverage for security/landlock/ is 93.6% of lines. The code not +covered only deals with internal kernel errors (e.g. memory allocation) +and race conditions. + +Cc: James Morris +Cc: Jann Horn +Cc: Serge E. Hallyn +Cc: Shuah Khan +Signed-off-by: Mickaël Salaün +Reviewed-by: Vincent Dagonneau +Reviewed-by: Kees Cook +Link: https://lore.kernel.org/r/20210422154123.13086-11-mic@digikod.net +Signed-off-by: James Morris +Stable-dep-of: 366617a69e60 ("selftests/landlock: Skip overlayfs tests when not supported") +Signed-off-by: Sasha Levin +--- + MAINTAINERS | 1 + + tools/testing/selftests/Makefile | 1 + + tools/testing/selftests/landlock/.gitignore | 2 + + tools/testing/selftests/landlock/Makefile | 24 + + tools/testing/selftests/landlock/base_test.c | 219 ++ + tools/testing/selftests/landlock/common.h | 183 ++ + tools/testing/selftests/landlock/config | 7 + + tools/testing/selftests/landlock/fs_test.c | 2791 +++++++++++++++++ + .../testing/selftests/landlock/ptrace_test.c | 337 ++ + tools/testing/selftests/landlock/true.c | 5 + + 10 files changed, 3570 insertions(+) + create mode 100644 tools/testing/selftests/landlock/.gitignore + create mode 100644 tools/testing/selftests/landlock/Makefile + create mode 100644 tools/testing/selftests/landlock/base_test.c + create mode 100644 tools/testing/selftests/landlock/common.h + create mode 100644 tools/testing/selftests/landlock/config + create mode 100644 tools/testing/selftests/landlock/fs_test.c + create mode 100644 tools/testing/selftests/landlock/ptrace_test.c + create mode 100644 tools/testing/selftests/landlock/true.c + +diff --git a/MAINTAINERS b/MAINTAINERS +index 72815c1a325eb..5bc6a028236e3 100644 +--- a/MAINTAINERS ++++ b/MAINTAINERS +@@ -9843,6 +9843,7 @@ S: Supported + W: https://landlock.io + T: git https://github.com/landlock-lsm/linux.git + F: security/landlock/ ++F: tools/testing/selftests/landlock/ + K: landlock + K: LANDLOCK + +diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile +index db1e24d7155fa..ca96973dca44d 100644 +--- a/tools/testing/selftests/Makefile ++++ b/tools/testing/selftests/Makefile +@@ -26,6 +26,7 @@ TARGETS += ir + TARGETS += kcmp + TARGETS += kexec + TARGETS += kvm ++TARGETS += landlock + TARGETS += lib + TARGETS += livepatch + TARGETS += lkdtm +diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore +new file mode 100644 +index 0000000000000..470203a7cd737 +--- /dev/null ++++ b/tools/testing/selftests/landlock/.gitignore +@@ -0,0 +1,2 @@ ++/*_test ++/true +diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile +new file mode 100644 +index 0000000000000..a99596ca9882b +--- /dev/null ++++ b/tools/testing/selftests/landlock/Makefile +@@ -0,0 +1,24 @@ ++# SPDX-License-Identifier: GPL-2.0 ++ ++CFLAGS += -Wall -O2 ++ ++src_test := $(wildcard *_test.c) ++ ++TEST_GEN_PROGS := $(src_test:.c=) ++ ++TEST_GEN_PROGS_EXTENDED := true ++ ++KSFT_KHDR_INSTALL := 1 ++OVERRIDE_TARGETS := 1 ++include ../lib.mk ++ ++khdr_dir = $(top_srcdir)/usr/include ++ ++$(khdr_dir)/linux/landlock.h: khdr ++ @: ++ ++$(OUTPUT)/true: true.c ++ $(LINK.c) $< $(LDLIBS) -o $@ -static ++ ++$(OUTPUT)/%_test: %_test.c $(khdr_dir)/linux/landlock.h ../kselftest_harness.h common.h ++ $(LINK.c) $< $(LDLIBS) -o $@ -lcap -I$(khdr_dir) +diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c +new file mode 100644 +index 0000000000000..262c3c8d953ad +--- /dev/null ++++ b/tools/testing/selftests/landlock/base_test.c +@@ -0,0 +1,219 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Landlock tests - Common user space base ++ * ++ * Copyright © 2017-2020 Mickaël Salaün ++ * Copyright © 2019-2020 ANSSI ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "common.h" ++ ++#ifndef O_PATH ++#define O_PATH 010000000 ++#endif ++ ++TEST(inconsistent_attr) { ++ const long page_size = sysconf(_SC_PAGESIZE); ++ char *const buf = malloc(page_size + 1); ++ struct landlock_ruleset_attr *const ruleset_attr = (void *)buf; ++ ++ ASSERT_NE(NULL, buf); ++ ++ /* Checks copy_from_user(). */ ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0)); ++ /* The size if less than sizeof(struct landlock_attr_enforce). */ ++ ASSERT_EQ(EINVAL, errno); ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0)); ++ ASSERT_EQ(EINVAL, errno); ++ ++ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0)); ++ /* The size if less than sizeof(struct landlock_attr_enforce). */ ++ ASSERT_EQ(EFAULT, errno); ++ ++ ASSERT_EQ(-1, landlock_create_ruleset(NULL, ++ sizeof(struct landlock_ruleset_attr), 0)); ++ ASSERT_EQ(EFAULT, errno); ++ ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); ++ ASSERT_EQ(E2BIG, errno); ++ ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, ++ sizeof(struct landlock_ruleset_attr), 0)); ++ ASSERT_EQ(ENOMSG, errno); ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); ++ ASSERT_EQ(ENOMSG, errno); ++ ++ /* Checks non-zero value. */ ++ buf[page_size - 2] = '.'; ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0)); ++ ASSERT_EQ(E2BIG, errno); ++ ++ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0)); ++ ASSERT_EQ(E2BIG, errno); ++ ++ free(buf); ++} ++ ++TEST(empty_path_beneath_attr) { ++ const struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE, ++ }; ++ const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ /* Similar to struct landlock_path_beneath_attr.parent_fd = 0 */ ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ NULL, 0)); ++ ASSERT_EQ(EFAULT, errno); ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++TEST(inval_fd_enforce) { ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ++ ASSERT_EQ(-1, landlock_restrict_self(-1, 0)); ++ ASSERT_EQ(EBADF, errno); ++} ++ ++TEST(unpriv_enforce_without_no_new_privs) { ++ int err; ++ ++ drop_caps(_metadata); ++ err = landlock_restrict_self(-1, 0); ++ ASSERT_EQ(EPERM, errno); ++ ASSERT_EQ(err, -1); ++} ++ ++TEST(ruleset_fd_io) ++{ ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, ++ }; ++ int ruleset_fd; ++ char buf; ++ ++ drop_caps(_metadata); ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(-1, write(ruleset_fd, ".", 1)); ++ ASSERT_EQ(EINVAL, errno); ++ ASSERT_EQ(-1, read(ruleset_fd, &buf, 1)); ++ ASSERT_EQ(EINVAL, errno); ++ ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */ ++TEST(ruleset_fd_transfer) ++{ ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR, ++ }; ++ struct landlock_path_beneath_attr path_beneath_attr = { ++ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR, ++ }; ++ int ruleset_fd_tx, dir_fd; ++ union { ++ /* Aligned ancillary data buffer. */ ++ char buf[CMSG_SPACE(sizeof(ruleset_fd_tx))]; ++ struct cmsghdr _align; ++ } cmsg_tx = {}; ++ char data_tx = '.'; ++ struct iovec io = { ++ .iov_base = &data_tx, ++ .iov_len = sizeof(data_tx), ++ }; ++ struct msghdr msg = { ++ .msg_iov = &io, ++ .msg_iovlen = 1, ++ .msg_control = &cmsg_tx.buf, ++ .msg_controllen = sizeof(cmsg_tx.buf), ++ }; ++ struct cmsghdr *cmsg; ++ int socket_fds[2]; ++ pid_t child; ++ int status; ++ ++ drop_caps(_metadata); ++ ++ /* Creates a test ruleset with a simple rule. */ ++ ruleset_fd_tx = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd_tx); ++ path_beneath_attr.parent_fd = open("/tmp", O_PATH | O_NOFOLLOW | ++ O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, path_beneath_attr.parent_fd); ++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath_attr, 0)); ++ ASSERT_EQ(0, close(path_beneath_attr.parent_fd)); ++ ++ cmsg = CMSG_FIRSTHDR(&msg); ++ ASSERT_NE(NULL, cmsg); ++ cmsg->cmsg_len = CMSG_LEN(sizeof(ruleset_fd_tx)); ++ cmsg->cmsg_level = SOL_SOCKET; ++ cmsg->cmsg_type = SCM_RIGHTS; ++ memcpy(CMSG_DATA(cmsg), &ruleset_fd_tx, sizeof(ruleset_fd_tx)); ++ ++ /* Sends the ruleset FD over a socketpair and then close it. */ ++ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, socket_fds)); ++ ASSERT_EQ(sizeof(data_tx), sendmsg(socket_fds[0], &msg, 0)); ++ ASSERT_EQ(0, close(socket_fds[0])); ++ ASSERT_EQ(0, close(ruleset_fd_tx)); ++ ++ child = fork(); ++ ASSERT_LE(0, child); ++ if (child == 0) { ++ int ruleset_fd_rx; ++ ++ *(char *)msg.msg_iov->iov_base = '\0'; ++ ASSERT_EQ(sizeof(data_tx), recvmsg(socket_fds[1], &msg, MSG_CMSG_CLOEXEC)); ++ ASSERT_EQ('.', *(char *)msg.msg_iov->iov_base); ++ ASSERT_EQ(0, close(socket_fds[1])); ++ cmsg = CMSG_FIRSTHDR(&msg); ++ ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(ruleset_fd_tx))); ++ memcpy(&ruleset_fd_rx, CMSG_DATA(cmsg), sizeof(ruleset_fd_tx)); ++ ++ /* Enforces the received ruleset on the child. */ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0)); ++ ASSERT_EQ(0, close(ruleset_fd_rx)); ++ ++ /* Checks that the ruleset enforcement. */ ++ ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC)); ++ ASSERT_EQ(EACCES, errno); ++ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, dir_fd); ++ ASSERT_EQ(0, close(dir_fd)); ++ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); ++ return; ++ } ++ ++ ASSERT_EQ(0, close(socket_fds[1])); ++ ++ /* Checks that the parent is unrestricted. */ ++ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, dir_fd); ++ ASSERT_EQ(0, close(dir_fd)); ++ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, dir_fd); ++ ASSERT_EQ(0, close(dir_fd)); ++ ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(1, WIFEXITED(status)); ++ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status)); ++} ++ ++TEST_HARNESS_MAIN +diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h +new file mode 100644 +index 0000000000000..20e2a9286d710 +--- /dev/null ++++ b/tools/testing/selftests/landlock/common.h +@@ -0,0 +1,183 @@ ++/* SPDX-License-Identifier: GPL-2.0 */ ++/* ++ * Landlock test helpers ++ * ++ * Copyright © 2017-2020 Mickaël Salaün ++ * Copyright © 2019-2020 ANSSI ++ * Copyright © 2021 Microsoft Corporation ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "../kselftest_harness.h" ++ ++#ifndef ARRAY_SIZE ++#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) ++#endif ++ ++/* ++ * TEST_F_FORK() is useful when a test drop privileges but the corresponding ++ * FIXTURE_TEARDOWN() requires them (e.g. to remove files from a directory ++ * where write actions are denied). For convenience, FIXTURE_TEARDOWN() is ++ * also called when the test failed, but not when FIXTURE_SETUP() failed. For ++ * this to be possible, we must not call abort() but instead exit smoothly ++ * (hence the step print). ++ */ ++#define TEST_F_FORK(fixture_name, test_name) \ ++ static void fixture_name##_##test_name##_child( \ ++ struct __test_metadata *_metadata, \ ++ FIXTURE_DATA(fixture_name) *self, \ ++ const FIXTURE_VARIANT(fixture_name) *variant); \ ++ TEST_F(fixture_name, test_name) \ ++ { \ ++ int status; \ ++ const pid_t child = fork(); \ ++ if (child < 0) \ ++ abort(); \ ++ if (child == 0) { \ ++ _metadata->no_print = 1; \ ++ fixture_name##_##test_name##_child(_metadata, self, variant); \ ++ if (_metadata->skip) \ ++ _exit(255); \ ++ if (_metadata->passed) \ ++ _exit(0); \ ++ _exit(_metadata->step); \ ++ } \ ++ if (child != waitpid(child, &status, 0)) \ ++ abort(); \ ++ if (WIFSIGNALED(status) || !WIFEXITED(status)) { \ ++ _metadata->passed = 0; \ ++ _metadata->step = 1; \ ++ return; \ ++ } \ ++ switch (WEXITSTATUS(status)) { \ ++ case 0: \ ++ _metadata->passed = 1; \ ++ break; \ ++ case 255: \ ++ _metadata->passed = 1; \ ++ _metadata->skip = 1; \ ++ break; \ ++ default: \ ++ _metadata->passed = 0; \ ++ _metadata->step = WEXITSTATUS(status); \ ++ break; \ ++ } \ ++ } \ ++ static void fixture_name##_##test_name##_child( \ ++ struct __test_metadata __attribute__((unused)) *_metadata, \ ++ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \ ++ const FIXTURE_VARIANT(fixture_name) \ ++ __attribute__((unused)) *variant) ++ ++#ifndef landlock_create_ruleset ++static inline int landlock_create_ruleset( ++ const struct landlock_ruleset_attr *const attr, ++ const size_t size, const __u32 flags) ++{ ++ return syscall(__NR_landlock_create_ruleset, attr, size, flags); ++} ++#endif ++ ++#ifndef landlock_add_rule ++static inline int landlock_add_rule(const int ruleset_fd, ++ const enum landlock_rule_type rule_type, ++ const void *const rule_attr, const __u32 flags) ++{ ++ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, ++ rule_attr, flags); ++} ++#endif ++ ++#ifndef landlock_restrict_self ++static inline int landlock_restrict_self(const int ruleset_fd, ++ const __u32 flags) ++{ ++ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags); ++} ++#endif ++ ++static void _init_caps(struct __test_metadata *const _metadata, bool drop_all) ++{ ++ cap_t cap_p; ++ /* Only these three capabilities are useful for the tests. */ ++ const cap_value_t caps[] = { ++ CAP_DAC_OVERRIDE, ++ CAP_MKNOD, ++ CAP_SYS_ADMIN, ++ CAP_SYS_CHROOT, ++ }; ++ ++ cap_p = cap_get_proc(); ++ EXPECT_NE(NULL, cap_p) { ++ TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); ++ } ++ EXPECT_NE(-1, cap_clear(cap_p)) { ++ TH_LOG("Failed to cap_clear: %s", strerror(errno)); ++ } ++ if (!drop_all) { ++ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED, ++ ARRAY_SIZE(caps), caps, CAP_SET)) { ++ TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); ++ } ++ } ++ EXPECT_NE(-1, cap_set_proc(cap_p)) { ++ TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); ++ } ++ EXPECT_NE(-1, cap_free(cap_p)) { ++ TH_LOG("Failed to cap_free: %s", strerror(errno)); ++ } ++} ++ ++/* We cannot put such helpers in a library because of kselftest_harness.h . */ ++__attribute__((__unused__)) ++static void disable_caps(struct __test_metadata *const _metadata) ++{ ++ _init_caps(_metadata, false); ++} ++ ++__attribute__((__unused__)) ++static void drop_caps(struct __test_metadata *const _metadata) ++{ ++ _init_caps(_metadata, true); ++} ++ ++static void _effective_cap(struct __test_metadata *const _metadata, ++ const cap_value_t caps, const cap_flag_value_t value) ++{ ++ cap_t cap_p; ++ ++ cap_p = cap_get_proc(); ++ EXPECT_NE(NULL, cap_p) { ++ TH_LOG("Failed to cap_get_proc: %s", strerror(errno)); ++ } ++ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_EFFECTIVE, 1, &caps, value)) { ++ TH_LOG("Failed to cap_set_flag: %s", strerror(errno)); ++ } ++ EXPECT_NE(-1, cap_set_proc(cap_p)) { ++ TH_LOG("Failed to cap_set_proc: %s", strerror(errno)); ++ } ++ EXPECT_NE(-1, cap_free(cap_p)) { ++ TH_LOG("Failed to cap_free: %s", strerror(errno)); ++ } ++} ++ ++__attribute__((__unused__)) ++static void set_cap(struct __test_metadata *const _metadata, ++ const cap_value_t caps) ++{ ++ _effective_cap(_metadata, caps, CAP_SET); ++} ++ ++__attribute__((__unused__)) ++static void clear_cap(struct __test_metadata *const _metadata, ++ const cap_value_t caps) ++{ ++ _effective_cap(_metadata, caps, CAP_CLEAR); ++} +diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config +new file mode 100644 +index 0000000000000..0f0a65287bacf +--- /dev/null ++++ b/tools/testing/selftests/landlock/config +@@ -0,0 +1,7 @@ ++CONFIG_OVERLAY_FS=y ++CONFIG_SECURITY_LANDLOCK=y ++CONFIG_SECURITY_PATH=y ++CONFIG_SECURITY=y ++CONFIG_SHMEM=y ++CONFIG_TMPFS_XATTR=y ++CONFIG_TMPFS=y +diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c +new file mode 100644 +index 0000000000000..10c9a1e4ebd9b +--- /dev/null ++++ b/tools/testing/selftests/landlock/fs_test.c +@@ -0,0 +1,2791 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Landlock tests - Filesystem ++ * ++ * Copyright © 2017-2020 Mickaël Salaün ++ * Copyright © 2020 ANSSI ++ * Copyright © 2020-2021 Microsoft Corporation ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "common.h" ++ ++#define TMP_DIR "tmp" ++#define BINARY_PATH "./true" ++ ++/* Paths (sibling number and depth) */ ++static const char dir_s1d1[] = TMP_DIR "/s1d1"; ++static const char file1_s1d1[] = TMP_DIR "/s1d1/f1"; ++static const char file2_s1d1[] = TMP_DIR "/s1d1/f2"; ++static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2"; ++static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1"; ++static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2"; ++static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3"; ++static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1"; ++static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2"; ++ ++static const char dir_s2d1[] = TMP_DIR "/s2d1"; ++static const char file1_s2d1[] = TMP_DIR "/s2d1/f1"; ++static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2"; ++static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1"; ++static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3"; ++static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1"; ++static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2"; ++ ++static const char dir_s3d1[] = TMP_DIR "/s3d1"; ++/* dir_s3d2 is a mount point. */ ++static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2"; ++static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; ++ ++/* ++ * layout1 hierarchy: ++ * ++ * tmp ++ * ├── s1d1 ++ * │   ├── f1 ++ * │   ├── f2 ++ * │   └── s1d2 ++ * │   ├── f1 ++ * │   ├── f2 ++ * │   └── s1d3 ++ * │   ├── f1 ++ * │   └── f2 ++ * ├── s2d1 ++ * │   ├── f1 ++ * │   └── s2d2 ++ * │   ├── f1 ++ * │   └── s2d3 ++ * │   ├── f1 ++ * │   └── f2 ++ * └── s3d1 ++ * └── s3d2 ++ * └── s3d3 ++ */ ++ ++static void mkdir_parents(struct __test_metadata *const _metadata, ++ const char *const path) ++{ ++ char *walker; ++ const char *parent; ++ int i, err; ++ ++ ASSERT_NE(path[0], '\0'); ++ walker = strdup(path); ++ ASSERT_NE(NULL, walker); ++ parent = walker; ++ for (i = 1; walker[i]; i++) { ++ if (walker[i] != '/') ++ continue; ++ walker[i] = '\0'; ++ err = mkdir(parent, 0700); ++ ASSERT_FALSE(err && errno != EEXIST) { ++ TH_LOG("Failed to create directory \"%s\": %s", ++ parent, strerror(errno)); ++ } ++ walker[i] = '/'; ++ } ++ free(walker); ++} ++ ++static void create_directory(struct __test_metadata *const _metadata, ++ const char *const path) ++{ ++ mkdir_parents(_metadata, path); ++ ASSERT_EQ(0, mkdir(path, 0700)) { ++ TH_LOG("Failed to create directory \"%s\": %s", path, ++ strerror(errno)); ++ } ++} ++ ++static void create_file(struct __test_metadata *const _metadata, ++ const char *const path) ++{ ++ mkdir_parents(_metadata, path); ++ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0)) { ++ TH_LOG("Failed to create file \"%s\": %s", path, ++ strerror(errno)); ++ } ++} ++ ++static int remove_path(const char *const path) ++{ ++ char *walker; ++ int i, ret, err = 0; ++ ++ walker = strdup(path); ++ if (!walker) { ++ err = ENOMEM; ++ goto out; ++ } ++ if (unlink(path) && rmdir(path)) { ++ if (errno != ENOENT) ++ err = errno; ++ goto out; ++ } ++ for (i = strlen(walker); i > 0; i--) { ++ if (walker[i] != '/') ++ continue; ++ walker[i] = '\0'; ++ ret = rmdir(walker); ++ if (ret) { ++ if (errno != ENOTEMPTY && errno != EBUSY) ++ err = errno; ++ goto out; ++ } ++ if (strcmp(walker, TMP_DIR) == 0) ++ goto out; ++ } ++ ++out: ++ free(walker); ++ return err; ++} ++ ++static void prepare_layout(struct __test_metadata *const _metadata) ++{ ++ disable_caps(_metadata); ++ umask(0077); ++ create_directory(_metadata, TMP_DIR); ++ ++ /* ++ * Do not pollute the rest of the system: creates a private mount point ++ * for tests relying on pivot_root(2) and move_mount(2). ++ */ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, unshare(CLONE_NEWNS)); ++ ASSERT_EQ(0, mount("tmp", TMP_DIR, "tmpfs", 0, "size=4m,mode=700")); ++ ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++} ++ ++static void cleanup_layout(struct __test_metadata *const _metadata) ++{ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, umount(TMP_DIR)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, remove_path(TMP_DIR)); ++} ++ ++static void create_layout1(struct __test_metadata *const _metadata) ++{ ++ create_file(_metadata, file1_s1d1); ++ create_file(_metadata, file1_s1d2); ++ create_file(_metadata, file1_s1d3); ++ create_file(_metadata, file2_s1d1); ++ create_file(_metadata, file2_s1d2); ++ create_file(_metadata, file2_s1d3); ++ ++ create_file(_metadata, file1_s2d1); ++ create_file(_metadata, file1_s2d2); ++ create_file(_metadata, file1_s2d3); ++ create_file(_metadata, file2_s2d3); ++ ++ create_directory(_metadata, dir_s3d2); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, mount("tmp", dir_s3d2, "tmpfs", 0, "size=4m,mode=700")); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ ++ ASSERT_EQ(0, mkdir(dir_s3d3, 0700)); ++} ++ ++static void remove_layout1(struct __test_metadata *const _metadata) ++{ ++ EXPECT_EQ(0, remove_path(file2_s1d3)); ++ EXPECT_EQ(0, remove_path(file2_s1d2)); ++ EXPECT_EQ(0, remove_path(file2_s1d1)); ++ EXPECT_EQ(0, remove_path(file1_s1d3)); ++ EXPECT_EQ(0, remove_path(file1_s1d2)); ++ EXPECT_EQ(0, remove_path(file1_s1d1)); ++ ++ EXPECT_EQ(0, remove_path(file2_s2d3)); ++ EXPECT_EQ(0, remove_path(file1_s2d3)); ++ EXPECT_EQ(0, remove_path(file1_s2d2)); ++ EXPECT_EQ(0, remove_path(file1_s2d1)); ++ ++ EXPECT_EQ(0, remove_path(dir_s3d3)); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ umount(dir_s3d2); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, remove_path(dir_s3d2)); ++} ++ ++FIXTURE(layout1) { ++}; ++ ++FIXTURE_SETUP(layout1) ++{ ++ prepare_layout(_metadata); ++ ++ create_layout1(_metadata); ++} ++ ++FIXTURE_TEARDOWN(layout1) ++{ ++ remove_layout1(_metadata); ++ ++ cleanup_layout(_metadata); ++} ++ ++/* ++ * This helper enables to use the ASSERT_* macros and print the line number ++ * pointing to the test caller. ++ */ ++static int test_open_rel(const int dirfd, const char *const path, const int flags) ++{ ++ int fd; ++ ++ /* Works with file and directories. */ ++ fd = openat(dirfd, path, flags | O_CLOEXEC); ++ if (fd < 0) ++ return errno; ++ /* ++ * Mixing error codes from close(2) and open(2) should not lead to any ++ * (access type) confusion for this test. ++ */ ++ if (close(fd) != 0) ++ return errno; ++ return 0; ++} ++ ++static int test_open(const char *const path, const int flags) ++{ ++ return test_open_rel(AT_FDCWD, path, flags); ++} ++ ++TEST_F_FORK(layout1, no_restriction) ++{ ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, inval) ++{ ++ struct landlock_path_beneath_attr path_beneath = { ++ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ .parent_fd = -1, ++ }; ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }; ++ int ruleset_fd; ++ ++ path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | ++ O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd); ++ ++ ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, ruleset_fd); ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */ ++ ASSERT_EQ(EBADF, errno); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, ruleset_fd); ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ /* Returns EBADFD because ruleset_fd is not a valid ruleset. */ ++ ASSERT_EQ(EBADFD, errno); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Gets a real ruleset. */ ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd); ++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++ ++ /* Tests without O_PATH. */ ++ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd); ++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++ ++ /* Tests with a ruleset FD. */ ++ path_beneath.parent_fd = ruleset_fd; ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(EBADFD, errno); ++ ++ /* Checks unhandled allowed_access. */ ++ path_beneath.parent_fd = open(dir_s1d2, O_PATH | O_DIRECTORY | ++ O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd); ++ ++ /* Test with legitimate values. */ ++ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE; ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(EINVAL, errno); ++ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE; ++ ++ /* Test with unknown (64-bits) value. */ ++ path_beneath.allowed_access |= (1ULL << 60); ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(EINVAL, errno); ++ path_beneath.allowed_access &= ~(1ULL << 60); ++ ++ /* Test with no access. */ ++ path_beneath.allowed_access = 0; ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(ENOMSG, errno); ++ path_beneath.allowed_access &= ~(1ULL << 60); ++ ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++ ++ /* Enforces the ruleset. */ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); ++ ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++#define ACCESS_FILE ( \ ++ LANDLOCK_ACCESS_FS_EXECUTE | \ ++ LANDLOCK_ACCESS_FS_WRITE_FILE | \ ++ LANDLOCK_ACCESS_FS_READ_FILE) ++ ++#define ACCESS_LAST LANDLOCK_ACCESS_FS_MAKE_SYM ++ ++#define ACCESS_ALL ( \ ++ ACCESS_FILE | \ ++ LANDLOCK_ACCESS_FS_READ_DIR | \ ++ LANDLOCK_ACCESS_FS_REMOVE_DIR | \ ++ LANDLOCK_ACCESS_FS_REMOVE_FILE | \ ++ LANDLOCK_ACCESS_FS_MAKE_CHAR | \ ++ LANDLOCK_ACCESS_FS_MAKE_DIR | \ ++ LANDLOCK_ACCESS_FS_MAKE_REG | \ ++ LANDLOCK_ACCESS_FS_MAKE_SOCK | \ ++ LANDLOCK_ACCESS_FS_MAKE_FIFO | \ ++ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \ ++ ACCESS_LAST) ++ ++TEST_F_FORK(layout1, file_access_rights) ++{ ++ __u64 access; ++ int err; ++ struct landlock_path_beneath_attr path_beneath = {}; ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = ACCESS_ALL, ++ }; ++ const int ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ /* Tests access rights for files. */ ++ path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd); ++ for (access = 1; access <= ACCESS_LAST; access <<= 1) { ++ path_beneath.allowed_access = access; ++ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0); ++ if ((access | ACCESS_FILE) == ACCESS_FILE) { ++ ASSERT_EQ(0, err); ++ } else { ++ ASSERT_EQ(-1, err); ++ ASSERT_EQ(EINVAL, errno); ++ } ++ } ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++} ++ ++static void add_path_beneath(struct __test_metadata *const _metadata, ++ const int ruleset_fd, const __u64 allowed_access, ++ const char *const path) ++{ ++ struct landlock_path_beneath_attr path_beneath = { ++ .allowed_access = allowed_access, ++ }; ++ ++ path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd) { ++ TH_LOG("Failed to open directory \"%s\": %s", path, ++ strerror(errno)); ++ } ++ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)) { ++ TH_LOG("Failed to update the ruleset with \"%s\": %s", path, ++ strerror(errno)); ++ } ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++} ++ ++struct rule { ++ const char *path; ++ __u64 access; ++}; ++ ++#define ACCESS_RO ( \ ++ LANDLOCK_ACCESS_FS_READ_FILE | \ ++ LANDLOCK_ACCESS_FS_READ_DIR) ++ ++#define ACCESS_RW ( \ ++ ACCESS_RO | \ ++ LANDLOCK_ACCESS_FS_WRITE_FILE) ++ ++static int create_ruleset(struct __test_metadata *const _metadata, ++ const __u64 handled_access_fs, const struct rule rules[]) ++{ ++ int ruleset_fd, i; ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = handled_access_fs, ++ }; ++ ++ ASSERT_NE(NULL, rules) { ++ TH_LOG("No rule list"); ++ } ++ ASSERT_NE(NULL, rules[0].path) { ++ TH_LOG("Empty rule list"); ++ } ++ ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd) { ++ TH_LOG("Failed to create a ruleset: %s", strerror(errno)); ++ } ++ ++ for (i = 0; rules[i].path; i++) { ++ add_path_beneath(_metadata, ruleset_fd, rules[i].access, ++ rules[i].path); ++ } ++ return ruleset_fd; ++} ++ ++static void enforce_ruleset(struct __test_metadata *const _metadata, ++ const int ruleset_fd) ++{ ++ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0)) { ++ TH_LOG("Failed to enforce ruleset: %s", strerror(errno)); ++ } ++} ++ ++TEST_F_FORK(layout1, proc_nsfs) ++{ ++ const struct rule rules[] = { ++ { ++ .path = "/dev/null", ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ struct landlock_path_beneath_attr path_beneath; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access | ++ LANDLOCK_ACCESS_FS_READ_DIR, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY)); ++ ASSERT_EQ(0, test_open("/dev/null", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY)); ++ ++ ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY)); ++ /* ++ * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a ++ * disconnected path. Such path cannot be identified and must then be ++ * allowed. ++ */ ++ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY)); ++ ++ /* ++ * Checks that it is not possible to add nsfs-like filesystem ++ * references to a ruleset. ++ */ ++ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC); ++ ASSERT_LE(0, path_beneath.parent_fd); ++ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH, ++ &path_beneath, 0)); ++ ASSERT_EQ(EBADFD, errno); ++ ASSERT_EQ(0, close(path_beneath.parent_fd)); ++} ++ ++TEST_F_FORK(layout1, unpriv) { ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ ++ drop_caps(_metadata); ++ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); ++ ASSERT_LE(0, ruleset_fd); ++ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0)); ++ ASSERT_EQ(EPERM, errno); ++ ++ /* enforce_ruleset() calls prctl(no_new_privs). */ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++TEST_F_FORK(layout1, effective_access) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = ACCESS_RO, ++ }, ++ { ++ .path = file1_s2d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ char buf; ++ int reg_fd; ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Tests on a directory. */ ++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ++ /* Tests on a file. */ ++ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); ++ ++ /* Checks effective read and write actions. */ ++ reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC); ++ ASSERT_LE(0, reg_fd); ++ ASSERT_EQ(1, write(reg_fd, ".", 1)); ++ ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET)); ++ ASSERT_EQ(1, read(reg_fd, &buf, 1)); ++ ASSERT_EQ('.', buf); ++ ASSERT_EQ(0, close(reg_fd)); ++ ++ /* Just in case, double-checks effective actions. */ ++ reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC); ++ ASSERT_LE(0, reg_fd); ++ ASSERT_EQ(-1, write(reg_fd, &buf, 1)); ++ ASSERT_EQ(EBADF, errno); ++ ASSERT_EQ(0, close(reg_fd)); ++} ++ ++TEST_F_FORK(layout1, unhandled_access) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ /* Here, we only handle read accesses, not write accesses. */ ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* ++ * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE, ++ * opening for write-only should be allowed, but not read-write. ++ */ ++ ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); ++ ++ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); ++} ++ ++TEST_F_FORK(layout1, ruleset_overlap) ++{ ++ const struct rule rules[] = { ++ /* These rules should be ORed among them. */ ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_READ_DIR, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks s1d1 hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ /* Checks s1d2 hierarchy. */ ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* Checks s1d3 hierarchy. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++} ++ ++TEST_F_FORK(layout1, non_overlapping_accesses) ++{ ++ const struct rule layer1[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_MAKE_REG, ++ }, ++ {} ++ }; ++ const struct rule layer2[] = { ++ { ++ .path = dir_s1d3, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, ++ layer1); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE, ++ layer2); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Unchanged accesses for file creation. */ ++ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0)); ++ ++ /* Checks file removing. */ ++ ASSERT_EQ(-1, unlink(file1_s1d2)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++} ++ ++TEST_F_FORK(layout1, interleaved_masked_accesses) ++{ ++ /* ++ * Checks overly restrictive rules: ++ * layer 1: allows R s1d1/s1d2/s1d3/file1 ++ * layer 2: allows RW s1d1/s1d2/s1d3 ++ * allows W s1d1/s1d2 ++ * denies R s1d1/s1d2 ++ * layer 3: allows R s1d1 ++ * layer 4: allows R s1d1/s1d2 ++ * denies W s1d1/s1d2 ++ * layer 5: allows R s1d1/s1d2 ++ * layer 6: allows X ---- ++ * layer 7: allows W s1d1/s1d2 ++ * denies R s1d1/s1d2 ++ */ ++ const struct rule layer1_read[] = { ++ /* Allows read access to file1_s1d3 with the first layer. */ ++ { ++ .path = file1_s1d3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ /* First rule with write restrictions. */ ++ const struct rule layer2_read_write[] = { ++ /* Start by granting read-write access via its parent directory... */ ++ { ++ .path = dir_s1d3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ /* ...but also denies read access via its grandparent directory. */ ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ const struct rule layer3_read[] = { ++ /* Allows read access via its great-grandparent directory. */ ++ { ++ .path = dir_s1d1, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ const struct rule layer4_read_write[] = { ++ /* ++ * Try to confuse the deny access by denying write (but not ++ * read) access via its grandparent directory. ++ */ ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ const struct rule layer5_read[] = { ++ /* ++ * Try to override layer2's deny read access by explicitly ++ * allowing read access via file1_s1d3's grandparent. ++ */ ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ const struct rule layer6_execute[] = { ++ /* ++ * Restricts an unrelated file hierarchy with a new access ++ * (non-overlapping) type. ++ */ ++ { ++ .path = dir_s2d1, ++ .access = LANDLOCK_ACCESS_FS_EXECUTE, ++ }, ++ {} ++ }; ++ const struct rule layer7_read_write[] = { ++ /* ++ * Finally, denies read access to file1_s1d3 via its ++ * grandparent. ++ */ ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, ++ layer1_read); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks that read access is granted for file1_s1d3 with layer 1. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer2_read_write); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks that previous access rights are unchanged with layer 2. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, ++ layer3_read); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks that previous access rights are unchanged with layer 3. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY)); ++ ++ /* This time, denies write access for the file hierarchy. */ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer4_read_write); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* ++ * Checks that the only change with layer 4 is that write access is ++ * denied. ++ */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE, ++ layer5_read); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks that previous access rights are unchanged with layer 5. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE, ++ layer6_execute); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks that previous access rights are unchanged with layer 6. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++ ++ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, layer7_read_write); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks read access is now denied with layer 7. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, inherit_subset) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_READ_DIR, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ /* Write access is forbidden. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ /* Readdir access is allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* Write access is forbidden. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ /* Readdir access is allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* ++ * Tests shared rule extension: the following rules should not grant ++ * any new access, only remove some. Once enforced, these rules are ++ * ANDed with the previous ones. ++ */ ++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, ++ dir_s1d2); ++ /* ++ * According to ruleset_fd, dir_s1d2 should now have the ++ * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE ++ * access rights (even if this directory is opened a second time). ++ * However, when enforcing this updated ruleset, the ruleset tied to ++ * the current process (i.e. its domain) will still only have the ++ * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and ++ * LANDLOCK_ACCESS_FS_READ_DIR accesses, but ++ * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would ++ * be a privilege escalation. ++ */ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ /* Same tests and results as above. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d2. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ /* Readdir access is still allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d3. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ /* Readdir access is still allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* ++ * Try to get more privileges by adding new access rights to the parent ++ * directory: dir_s1d1. ++ */ ++ add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ /* Same tests and results as above. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d2. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ /* Readdir access is still allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d3. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ /* Readdir access is still allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* ++ * Now, dir_s1d3 get a new rule tied to it, only allowing ++ * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is ++ * that there was no rule tied to it before. ++ */ ++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE, ++ dir_s1d3); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* ++ * Same tests and results as above, except for open(dir_s1d3) which is ++ * now denied because the new rule mask the rule previously inherited ++ * from dir_s1d2. ++ */ ++ ++ /* Same tests and results as above. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d2. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ /* Readdir access is still allowed. */ ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* It is still forbidden to write in file1_s1d3. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ /* ++ * Readdir of dir_s1d3 is still allowed because of the OR policy inside ++ * the same layer. ++ */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++} ++ ++TEST_F_FORK(layout1, inherit_superset) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d3, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ /* Readdir access is denied for dir_s1d2. */ ++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ /* Readdir access is allowed for dir_s1d3. */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ /* File access is allowed for file1_s1d3. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ++ /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */ ++ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_READ_DIR, dir_s1d2); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Readdir access is still denied for dir_s1d2. */ ++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ /* Readdir access is still allowed for dir_s1d3. */ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ /* File access is still allowed for file1_s1d3. */ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, max_layers) ++{ ++ int i, err; ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ for (i = 0; i < 64; i++) ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ for (i = 0; i < 2; i++) { ++ err = landlock_restrict_self(ruleset_fd, 0); ++ ASSERT_EQ(-1, err); ++ ASSERT_EQ(E2BIG, errno); ++ } ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++TEST_F_FORK(layout1, empty_or_same_ruleset) ++{ ++ struct landlock_ruleset_attr ruleset_attr = {}; ++ int ruleset_fd; ++ ++ /* Tests empty handled_access_fs. */ ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(-1, ruleset_fd); ++ ASSERT_EQ(ENOMSG, errno); ++ ++ /* Enforces policy which deny read access to all files. */ ++ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE; ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ++ /* Nests a policy which deny read access to all directories. */ ++ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR; ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); ++ ++ /* Enforces a second time with the same ruleset. */ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++TEST_F_FORK(layout1, rule_on_mountpoint) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d1, ++ .access = ACCESS_RO, ++ }, ++ { ++ /* dir_s3d2 is a mount point. */ ++ .path = dir_s3d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); ++ ++ ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, rule_over_mountpoint) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d1, ++ .access = ACCESS_RO, ++ }, ++ { ++ /* dir_s3d2 is a mount point. */ ++ .path = dir_s3d1, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY)); ++} ++ ++/* ++ * This test verifies that we can apply a landlock rule on the root directory ++ * (which might require special handling). ++ */ ++TEST_F_FORK(layout1, rule_over_root_allow_then_deny) ++{ ++ struct rule rules[] = { ++ { ++ .path = "/", ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks allowed access. */ ++ ASSERT_EQ(0, test_open("/", O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ++ rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE; ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks denied access (on a directory). */ ++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, rule_over_root_deny) ++{ ++ const struct rule rules[] = { ++ { ++ .path = "/", ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks denied access (on a directory). */ ++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, rule_inside_mount_ns) ++{ ++ const struct rule rules[] = { ++ { ++ .path = "s3d3", ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)) { ++ TH_LOG("Failed to pivot root: %s", strerror(errno)); ++ }; ++ ASSERT_EQ(0, chdir("/")); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, test_open("s3d3", O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open("/", O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1, mount_and_pivot) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s3d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL)); ++ ASSERT_EQ(EPERM, errno); ++ ASSERT_EQ(-1, syscall(SYS_pivot_root, dir_s3d2, dir_s3d3)); ++ ASSERT_EQ(EPERM, errno); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++} ++ ++TEST_F_FORK(layout1, move_mount) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s3d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, ++ dir_s1d2, 0)) { ++ TH_LOG("Failed to move mount: %s", strerror(errno)); ++ } ++ ++ ASSERT_EQ(0, syscall(SYS_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD, ++ dir_s3d2, 0)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(-1, syscall(SYS_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD, ++ dir_s1d2, 0)); ++ ASSERT_EQ(EPERM, errno); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++} ++ ++TEST_F_FORK(layout1, release_inodes) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d1, ++ .access = ACCESS_RO, ++ }, ++ { ++ .path = dir_s3d2, ++ .access = ACCESS_RO, ++ }, ++ { ++ .path = dir_s3d3, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ /* Unmount a file hierarchy while it is being used by a ruleset. */ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, umount(dir_s3d2)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); ++ /* This dir_s3d3 would not be allowed and does not exist anyway. */ ++ ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); ++} ++ ++enum relative_access { ++ REL_OPEN, ++ REL_CHDIR, ++ REL_CHROOT_ONLY, ++ REL_CHROOT_CHDIR, ++}; ++ ++static void test_relative_path(struct __test_metadata *const _metadata, ++ const enum relative_access rel) ++{ ++ /* ++ * Common layer to check that chroot doesn't ignore it (i.e. a chroot ++ * is not a disconnected root directory). ++ */ ++ const struct rule layer1_base[] = { ++ { ++ .path = TMP_DIR, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ const struct rule layer2_subs[] = { ++ { ++ .path = dir_s1d2, ++ .access = ACCESS_RO, ++ }, ++ { ++ .path = dir_s2d2, ++ .access = ACCESS_RO, ++ }, ++ {} ++ }; ++ int dirfd, ruleset_fd; ++ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs); ++ ++ ASSERT_LE(0, ruleset_fd); ++ switch (rel) { ++ case REL_OPEN: ++ case REL_CHDIR: ++ break; ++ case REL_CHROOT_ONLY: ++ ASSERT_EQ(0, chdir(dir_s2d2)); ++ break; ++ case REL_CHROOT_CHDIR: ++ ASSERT_EQ(0, chdir(dir_s1d2)); ++ break; ++ default: ++ ASSERT_TRUE(false); ++ return; ++ } ++ ++ set_cap(_metadata, CAP_SYS_CHROOT); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ++ switch (rel) { ++ case REL_OPEN: ++ dirfd = open(dir_s1d2, O_DIRECTORY); ++ ASSERT_LE(0, dirfd); ++ break; ++ case REL_CHDIR: ++ ASSERT_EQ(0, chdir(dir_s1d2)); ++ dirfd = AT_FDCWD; ++ break; ++ case REL_CHROOT_ONLY: ++ /* Do chroot into dir_s1d2 (relative to dir_s2d2). */ ++ ASSERT_EQ(0, chroot("../../s1d1/s1d2")) { ++ TH_LOG("Failed to chroot: %s", strerror(errno)); ++ } ++ dirfd = AT_FDCWD; ++ break; ++ case REL_CHROOT_CHDIR: ++ /* Do chroot into dir_s1d2. */ ++ ASSERT_EQ(0, chroot(".")) { ++ TH_LOG("Failed to chroot: %s", strerror(errno)); ++ } ++ dirfd = AT_FDCWD; ++ break; ++ } ++ ++ ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES, ++ test_open_rel(dirfd, "..", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY)); ++ ++ if (rel == REL_CHROOT_ONLY) { ++ /* The current directory is dir_s2d2. */ ++ ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY)); ++ } else { ++ /* The current directory is dir_s1d2. */ ++ ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY)); ++ } ++ ++ if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) { ++ /* Checks the root dir_s1d2. */ ++ ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY)); ++ } ++ ++ if (rel != REL_CHROOT_CHDIR) { ++ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3", O_RDONLY)); ++ ++ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY)); ++ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3", O_RDONLY)); ++ } ++ ++ if (rel == REL_OPEN) ++ ASSERT_EQ(0, close(dirfd)); ++ ASSERT_EQ(0, close(ruleset_fd)); ++} ++ ++TEST_F_FORK(layout1, relative_open) ++{ ++ test_relative_path(_metadata, REL_OPEN); ++} ++ ++TEST_F_FORK(layout1, relative_chdir) ++{ ++ test_relative_path(_metadata, REL_CHDIR); ++} ++ ++TEST_F_FORK(layout1, relative_chroot_only) ++{ ++ test_relative_path(_metadata, REL_CHROOT_ONLY); ++} ++ ++TEST_F_FORK(layout1, relative_chroot_chdir) ++{ ++ test_relative_path(_metadata, REL_CHROOT_CHDIR); ++} ++ ++static void copy_binary(struct __test_metadata *const _metadata, ++ const char *const dst_path) ++{ ++ int dst_fd, src_fd; ++ struct stat statbuf; ++ ++ dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC); ++ ASSERT_LE(0, dst_fd) { ++ TH_LOG("Failed to open \"%s\": %s", dst_path, ++ strerror(errno)); ++ } ++ src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC); ++ ASSERT_LE(0, src_fd) { ++ TH_LOG("Failed to open \"" BINARY_PATH "\": %s", ++ strerror(errno)); ++ } ++ ASSERT_EQ(0, fstat(src_fd, &statbuf)); ++ ASSERT_EQ(statbuf.st_size, sendfile(dst_fd, src_fd, 0, ++ statbuf.st_size)); ++ ASSERT_EQ(0, close(src_fd)); ++ ASSERT_EQ(0, close(dst_fd)); ++} ++ ++static void test_execute(struct __test_metadata *const _metadata, ++ const int err, const char *const path) ++{ ++ int status; ++ char *const argv[] = {(char *)path, NULL}; ++ const pid_t child = fork(); ++ ++ ASSERT_LE(0, child); ++ if (child == 0) { ++ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL)) { ++ TH_LOG("Failed to execute \"%s\": %s", path, ++ strerror(errno)); ++ }; ++ ASSERT_EQ(err, errno); ++ _exit(_metadata->passed ? 2 : 1); ++ return; ++ } ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(1, WIFEXITED(status)); ++ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status)) { ++ TH_LOG("Unexpected return code for \"%s\": %s", path, ++ strerror(errno)); ++ }; ++} ++ ++TEST_F_FORK(layout1, execute) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_EXECUTE, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ copy_binary(_metadata, file1_s1d1); ++ copy_binary(_metadata, file1_s1d2); ++ copy_binary(_metadata, file1_s1d3); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); ++ test_execute(_metadata, EACCES, file1_s1d1); ++ ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ test_execute(_metadata, 0, file1_s1d2); ++ ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ test_execute(_metadata, 0, file1_s1d3); ++} ++ ++TEST_F_FORK(layout1, link) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_MAKE_REG, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ /* Denies linking because of reparenting. */ ++ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3)); ++ ASSERT_EQ(EXDEV, errno); ++ ++ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2)); ++ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3)); ++} ++ ++TEST_F_FORK(layout1, rename_file) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d3, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, ++ }, ++ { ++ .path = dir_s2d2, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* ++ * Tries to replace a file, from a directory that allows file removal, ++ * but to a different directory (which also allows file removal). ++ */ ++ ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ++ /* ++ * Tries to replace a file, from a directory that denies file removal, ++ * to a different directory (which allows file removal). ++ */ ++ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ++ /* Exchanges files and directories that partially allow removal. */ ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EACCES, errno); ++ ++ /* Renames files with different parents. */ ++ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3)); ++ ASSERT_EQ(EXDEV, errno); ++ ++ /* Exchanges and renames files with same parent. */ ++ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3)); ++ ++ /* Exchanges files and directories with same parent, twice. */ ++ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3, ++ RENAME_EXCHANGE)); ++} ++ ++TEST_F_FORK(layout1, rename_dir) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, ++ }, ++ { ++ .path = dir_s2d1, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ /* Empties dir_s1d3 to allow renaming. */ ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Exchanges and renames directory to a different parent. */ ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3)); ++ ASSERT_EQ(EXDEV, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EXDEV, errno); ++ ++ /* ++ * Exchanges directory to the same parent, which doesn't allow ++ * directory removal. ++ */ ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(EACCES, errno); ++ ++ /* ++ * Exchanges and renames directory to the same parent, which allows ++ * directory removal. ++ */ ++ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2, ++ RENAME_EXCHANGE)); ++ ASSERT_EQ(0, unlink(dir_s1d3)); ++ ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); ++ ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3)); ++ ASSERT_EQ(0, rmdir(dir_s1d3)); ++} ++ ++TEST_F_FORK(layout1, remove_dir) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(0, rmdir(dir_s1d3)); ++ ASSERT_EQ(0, mkdir(dir_s1d3, 0700)); ++ ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR)); ++ ++ /* dir_s1d2 itself cannot be removed. */ ++ ASSERT_EQ(-1, rmdir(dir_s1d2)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, rmdir(dir_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR)); ++ ASSERT_EQ(EACCES, errno); ++} ++ ++TEST_F_FORK(layout1, remove_file) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(-1, unlink(file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0)); ++} ++ ++static void test_make_file(struct __test_metadata *const _metadata, ++ const __u64 access, const mode_t mode, const dev_t dev) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = access, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, access, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file2_s1d1)); ++ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev)) { ++ TH_LOG("Failed to make file \"%s\": %s", ++ file2_s1d1, strerror(errno)); ++ }; ++ ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlink(file2_s1d2)); ++ ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ++ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev)) { ++ TH_LOG("Failed to make file \"%s\": %s", ++ file1_s1d2, strerror(errno)); ++ }; ++ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); ++ ASSERT_EQ(0, unlink(file2_s1d2)); ++ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); ++ ++ ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev)); ++ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); ++} ++ ++TEST_F_FORK(layout1, make_char) ++{ ++ /* Creates a /dev/null device. */ ++ set_cap(_metadata, CAP_MKNOD); ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR, ++ makedev(1, 3)); ++} ++ ++TEST_F_FORK(layout1, make_block) ++{ ++ /* Creates a /dev/loop0 device. */ ++ set_cap(_metadata, CAP_MKNOD); ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK, ++ makedev(7, 0)); ++} ++ ++TEST_F_FORK(layout1, make_reg_1) ++{ ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0); ++} ++ ++TEST_F_FORK(layout1, make_reg_2) ++{ ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0); ++} ++ ++TEST_F_FORK(layout1, make_sock) ++{ ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0); ++} ++ ++TEST_F_FORK(layout1, make_fifo) ++{ ++ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0); ++} ++ ++TEST_F_FORK(layout1, make_sym) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_MAKE_SYM, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file2_s1d1)); ++ ASSERT_EQ(0, symlink("none", file2_s1d1)); ++ ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlink(file2_s1d2)); ++ ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(-1, symlink("none", file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1)); ++ ASSERT_EQ(EACCES, errno); ++ ++ ASSERT_EQ(0, symlink("none", file1_s1d2)); ++ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2)); ++ ASSERT_EQ(0, unlink(file2_s1d2)); ++ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2)); ++ ++ ASSERT_EQ(0, symlink("none", file1_s1d3)); ++ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3)); ++ ASSERT_EQ(0, unlink(file2_s1d3)); ++ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3)); ++} ++ ++TEST_F_FORK(layout1, make_dir) ++{ ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_MAKE_DIR, ++ }, ++ {} ++ }; ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ ++ ASSERT_EQ(0, unlink(file1_s1d1)); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ASSERT_EQ(0, unlink(file1_s1d3)); ++ ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Uses file_* as directory names. */ ++ ASSERT_EQ(-1, mkdir(file1_s1d1, 0700)); ++ ASSERT_EQ(EACCES, errno); ++ ASSERT_EQ(0, mkdir(file1_s1d2, 0700)); ++ ASSERT_EQ(0, mkdir(file1_s1d3, 0700)); ++} ++ ++static int open_proc_fd(struct __test_metadata *const _metadata, const int fd, ++ const int open_flags) ++{ ++ static const char path_template[] = "/proc/self/fd/%d"; ++ char procfd_path[sizeof(path_template) + 10]; ++ const int procfd_path_size = snprintf(procfd_path, sizeof(procfd_path), ++ path_template, fd); ++ ++ ASSERT_LT(procfd_path_size, sizeof(procfd_path)); ++ return open(procfd_path, open_flags); ++} ++ ++TEST_F_FORK(layout1, proc_unlinked_file) ++{ ++ const struct rule rules[] = { ++ { ++ .path = file1_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ int reg_fd, proc_fd; ++ const int ruleset_fd = create_ruleset(_metadata, ++ LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC); ++ ASSERT_LE(0, reg_fd); ++ ASSERT_EQ(0, unlink(file1_s1d2)); ++ ++ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC); ++ ASSERT_LE(0, proc_fd); ++ ASSERT_EQ(0, close(proc_fd)); ++ ++ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC); ++ ASSERT_EQ(-1, proc_fd) { ++ TH_LOG("Successfully opened /proc/self/fd/%d: %s", ++ reg_fd, strerror(errno)); ++ } ++ ASSERT_EQ(EACCES, errno); ++ ++ ASSERT_EQ(0, close(reg_fd)); ++} ++ ++TEST_F_FORK(layout1, proc_pipe) ++{ ++ int proc_fd; ++ int pipe_fds[2]; ++ char buf = '\0'; ++ const struct rule rules[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ /* Limits read and write access to files tied to the filesystem. */ ++ const int ruleset_fd = create_ruleset(_metadata, rules[0].access, ++ rules); ++ ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks enforcement for normal files. */ ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR)); ++ ++ /* Checks access to pipes through FD. */ ++ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC)); ++ ASSERT_EQ(1, write(pipe_fds[1], ".", 1)) { ++ TH_LOG("Failed to write in pipe: %s", strerror(errno)); ++ } ++ ASSERT_EQ(1, read(pipe_fds[0], &buf, 1)); ++ ASSERT_EQ('.', buf); ++ ++ /* Checks write access to pipe through /proc/self/fd . */ ++ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC); ++ ASSERT_LE(0, proc_fd); ++ ASSERT_EQ(1, write(proc_fd, ".", 1)) { ++ TH_LOG("Failed to write through /proc/self/fd/%d: %s", ++ pipe_fds[1], strerror(errno)); ++ } ++ ASSERT_EQ(0, close(proc_fd)); ++ ++ /* Checks read access to pipe through /proc/self/fd . */ ++ proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC); ++ ASSERT_LE(0, proc_fd); ++ buf = '\0'; ++ ASSERT_EQ(1, read(proc_fd, &buf, 1)) { ++ TH_LOG("Failed to read through /proc/self/fd/%d: %s", ++ pipe_fds[1], strerror(errno)); ++ } ++ ASSERT_EQ(0, close(proc_fd)); ++ ++ ASSERT_EQ(0, close(pipe_fds[0])); ++ ASSERT_EQ(0, close(pipe_fds[1])); ++} ++ ++FIXTURE(layout1_bind) { ++}; ++ ++FIXTURE_SETUP(layout1_bind) ++{ ++ prepare_layout(_metadata); ++ ++ create_layout1(_metadata); ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++} ++ ++FIXTURE_TEARDOWN(layout1_bind) ++{ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, umount(dir_s2d2)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ ++ remove_layout1(_metadata); ++ ++ cleanup_layout(_metadata); ++} ++ ++static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3"; ++static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1"; ++ ++/* ++ * layout1_bind hierarchy: ++ * ++ * tmp ++ * ├── s1d1 ++ * │   ├── f1 ++ * │   ├── f2 ++ * │   └── s1d2 ++ * │   ├── f1 ++ * │   ├── f2 ++ * │   └── s1d3 ++ * │   ├── f1 ++ * │   └── f2 ++ * ├── s2d1 ++ * │   ├── f1 ++ * │   └── s2d2 ++ * │   ├── f1 ++ * │   ├── f2 ++ * │   └── s1d3 ++ * │   ├── f1 ++ * │   └── f2 ++ * └── s3d1 ++ * └── s3d2 ++ * └── s3d3 ++ */ ++ ++TEST_F_FORK(layout1_bind, no_restriction) ++{ ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY)); ++ ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY)); ++ ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY)); ++} ++ ++TEST_F_FORK(layout1_bind, same_content_same_file) ++{ ++ /* ++ * Sets access right on parent directories of both source and ++ * destination mount points. ++ */ ++ const struct rule layer1_parent[] = { ++ { ++ .path = dir_s1d1, ++ .access = ACCESS_RO, ++ }, ++ { ++ .path = dir_s2d1, ++ .access = ACCESS_RW, ++ }, ++ {} ++ }; ++ /* ++ * Sets access rights on the same bind-mounted directories. The result ++ * should be ACCESS_RW for both directories, but not both hierarchies ++ * because of the first layer. ++ */ ++ const struct rule layer2_mount_point[] = { ++ { ++ .path = dir_s1d2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = dir_s2d2, ++ .access = ACCESS_RW, ++ }, ++ {} ++ }; ++ /* Only allow read-access to the s1d3 hierarchies. */ ++ const struct rule layer3_source[] = { ++ { ++ .path = dir_s1d3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ {} ++ }; ++ /* Removes all access rights. */ ++ const struct rule layer4_destination[] = { ++ { ++ .path = bind_file1_s1d3, ++ .access = LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ ++ /* Sets rules for the parent directories. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks source hierarchy. */ ++ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* Checks destination hierarchy. */ ++ ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR)); ++ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); ++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* Sets rules for the mount points. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks source hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ /* Checks destination hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR)); ++ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); ++ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* Sets a (shared) rule only on the source. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks source hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* Checks destination hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY)); ++ ++ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); ++ ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY)); ++ ++ /* Sets a (shared) rule only on the destination. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks source hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY)); ++ ++ /* Checks destination hierarchy. */ ++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY)); ++} ++ ++#define LOWER_BASE TMP_DIR "/lower" ++#define LOWER_DATA LOWER_BASE "/data" ++static const char lower_fl1[] = LOWER_DATA "/fl1"; ++static const char lower_dl1[] = LOWER_DATA "/dl1"; ++static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2"; ++static const char lower_fo1[] = LOWER_DATA "/fo1"; ++static const char lower_do1[] = LOWER_DATA "/do1"; ++static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2"; ++static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3"; ++ ++static const char (*lower_base_files[])[] = { ++ &lower_fl1, ++ &lower_fo1, ++ NULL ++}; ++static const char (*lower_base_directories[])[] = { ++ &lower_dl1, ++ &lower_do1, ++ NULL ++}; ++static const char (*lower_sub_files[])[] = { ++ &lower_dl1_fl2, ++ &lower_do1_fo2, ++ &lower_do1_fl3, ++ NULL ++}; ++ ++#define UPPER_BASE TMP_DIR "/upper" ++#define UPPER_DATA UPPER_BASE "/data" ++#define UPPER_WORK UPPER_BASE "/work" ++static const char upper_fu1[] = UPPER_DATA "/fu1"; ++static const char upper_du1[] = UPPER_DATA "/du1"; ++static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2"; ++static const char upper_fo1[] = UPPER_DATA "/fo1"; ++static const char upper_do1[] = UPPER_DATA "/do1"; ++static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2"; ++static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3"; ++ ++static const char (*upper_base_files[])[] = { ++ &upper_fu1, ++ &upper_fo1, ++ NULL ++}; ++static const char (*upper_base_directories[])[] = { ++ &upper_du1, ++ &upper_do1, ++ NULL ++}; ++static const char (*upper_sub_files[])[] = { ++ &upper_du1_fu2, ++ &upper_do1_fo2, ++ &upper_do1_fu3, ++ NULL ++}; ++ ++#define MERGE_BASE TMP_DIR "/merge" ++#define MERGE_DATA MERGE_BASE "/data" ++static const char merge_fl1[] = MERGE_DATA "/fl1"; ++static const char merge_dl1[] = MERGE_DATA "/dl1"; ++static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2"; ++static const char merge_fu1[] = MERGE_DATA "/fu1"; ++static const char merge_du1[] = MERGE_DATA "/du1"; ++static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2"; ++static const char merge_fo1[] = MERGE_DATA "/fo1"; ++static const char merge_do1[] = MERGE_DATA "/do1"; ++static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2"; ++static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3"; ++static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3"; ++ ++static const char (*merge_base_files[])[] = { ++ &merge_fl1, ++ &merge_fu1, ++ &merge_fo1, ++ NULL ++}; ++static const char (*merge_base_directories[])[] = { ++ &merge_dl1, ++ &merge_du1, ++ &merge_do1, ++ NULL ++}; ++static const char (*merge_sub_files[])[] = { ++ &merge_dl1_fl2, ++ &merge_du1_fu2, ++ &merge_do1_fo2, ++ &merge_do1_fl3, ++ &merge_do1_fu3, ++ NULL ++}; ++ ++/* ++ * layout2_overlay hierarchy: ++ * ++ * tmp ++ * ├── lower ++ * │   └── data ++ * │   ├── dl1 ++ * │   │   └── fl2 ++ * │   ├── do1 ++ * │   │   ├── fl3 ++ * │   │   └── fo2 ++ * │   ├── fl1 ++ * │   └── fo1 ++ * ├── merge ++ * │   └── data ++ * │   ├── dl1 ++ * │   │   └── fl2 ++ * │   ├── do1 ++ * │   │   ├── fl3 ++ * │   │   ├── fo2 ++ * │   │   └── fu3 ++ * │   ├── du1 ++ * │   │   └── fu2 ++ * │   ├── fl1 ++ * │   ├── fo1 ++ * │   └── fu1 ++ * └── upper ++ * ├── data ++ * │   ├── do1 ++ * │   │   ├── fo2 ++ * │   │   └── fu3 ++ * │   ├── du1 ++ * │   │   └── fu2 ++ * │   ├── fo1 ++ * │   └── fu1 ++ * └── work ++ * └── work ++ */ ++ ++FIXTURE(layout2_overlay) { ++}; ++ ++FIXTURE_SETUP(layout2_overlay) ++{ ++ prepare_layout(_metadata); ++ ++ create_directory(_metadata, LOWER_BASE); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ /* Creates tmpfs mount points to get deterministic overlayfs. */ ++ ASSERT_EQ(0, mount("tmp", LOWER_BASE, "tmpfs", 0, "size=4m,mode=700")); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ create_file(_metadata, lower_fl1); ++ create_file(_metadata, lower_dl1_fl2); ++ create_file(_metadata, lower_fo1); ++ create_file(_metadata, lower_do1_fo2); ++ create_file(_metadata, lower_do1_fl3); ++ ++ create_directory(_metadata, UPPER_BASE); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ ASSERT_EQ(0, mount("tmp", UPPER_BASE, "tmpfs", 0, "size=4m,mode=700")); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ create_file(_metadata, upper_fu1); ++ create_file(_metadata, upper_du1_fu2); ++ create_file(_metadata, upper_fo1); ++ create_file(_metadata, upper_do1_fo2); ++ create_file(_metadata, upper_do1_fu3); ++ ASSERT_EQ(0, mkdir(UPPER_WORK, 0700)); ++ ++ create_directory(_metadata, MERGE_DATA); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ set_cap(_metadata, CAP_DAC_OVERRIDE); ++ ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0, ++ "lowerdir=" LOWER_DATA ++ ",upperdir=" UPPER_DATA ++ ",workdir=" UPPER_WORK)); ++ clear_cap(_metadata, CAP_DAC_OVERRIDE); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++} ++ ++FIXTURE_TEARDOWN(layout2_overlay) ++{ ++ EXPECT_EQ(0, remove_path(lower_do1_fl3)); ++ EXPECT_EQ(0, remove_path(lower_dl1_fl2)); ++ EXPECT_EQ(0, remove_path(lower_fl1)); ++ EXPECT_EQ(0, remove_path(lower_do1_fo2)); ++ EXPECT_EQ(0, remove_path(lower_fo1)); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, umount(LOWER_BASE)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, remove_path(LOWER_BASE)); ++ ++ EXPECT_EQ(0, remove_path(upper_do1_fu3)); ++ EXPECT_EQ(0, remove_path(upper_du1_fu2)); ++ EXPECT_EQ(0, remove_path(upper_fu1)); ++ EXPECT_EQ(0, remove_path(upper_do1_fo2)); ++ EXPECT_EQ(0, remove_path(upper_fo1)); ++ EXPECT_EQ(0, remove_path(UPPER_WORK "/work")); ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, umount(UPPER_BASE)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, remove_path(UPPER_BASE)); ++ ++ set_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, umount(MERGE_DATA)); ++ clear_cap(_metadata, CAP_SYS_ADMIN); ++ EXPECT_EQ(0, remove_path(MERGE_DATA)); ++ ++ cleanup_layout(_metadata); ++} ++ ++TEST_F_FORK(layout2_overlay, no_restriction) ++{ ++ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_do1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_du1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_do1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY)); ++ ++ ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_du1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_do1, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY)); ++ ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY)); ++} ++ ++#define for_each_path(path_list, path_entry, i) \ ++ for (i = 0, path_entry = *path_list[i]; path_list[i]; \ ++ path_entry = *path_list[++i]) ++ ++TEST_F_FORK(layout2_overlay, same_content_different_file) ++{ ++ /* Sets access right on parent directories of both layers. */ ++ const struct rule layer1_base[] = { ++ { ++ .path = LOWER_BASE, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = UPPER_BASE, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = MERGE_BASE, ++ .access = ACCESS_RW, ++ }, ++ {} ++ }; ++ const struct rule layer2_data[] = { ++ { ++ .path = LOWER_DATA, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = UPPER_DATA, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = MERGE_DATA, ++ .access = ACCESS_RW, ++ }, ++ {} ++ }; ++ /* Sets access right on directories inside both layers. */ ++ const struct rule layer3_subdirs[] = { ++ { ++ .path = lower_dl1, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = lower_do1, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = upper_du1, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = upper_do1, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = merge_dl1, ++ .access = ACCESS_RW, ++ }, ++ { ++ .path = merge_du1, ++ .access = ACCESS_RW, ++ }, ++ { ++ .path = merge_do1, ++ .access = ACCESS_RW, ++ }, ++ {} ++ }; ++ /* Tighten access rights to the files. */ ++ const struct rule layer4_files[] = { ++ { ++ .path = lower_dl1_fl2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = lower_do1_fo2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = lower_do1_fl3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = upper_du1_fu2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = upper_do1_fo2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = upper_do1_fu3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE, ++ }, ++ { ++ .path = merge_dl1_fl2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ { ++ .path = merge_du1_fu2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ { ++ .path = merge_do1_fo2, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ { ++ .path = merge_do1_fl3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ { ++ .path = merge_do1_fu3, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ const struct rule layer5_merge_only[] = { ++ { ++ .path = MERGE_DATA, ++ .access = LANDLOCK_ACCESS_FS_READ_FILE | ++ LANDLOCK_ACCESS_FS_WRITE_FILE, ++ }, ++ {} ++ }; ++ int ruleset_fd; ++ size_t i; ++ const char *path_entry; ++ ++ /* Sets rules on base directories (i.e. outside overlay scope). */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks lower layer. */ ++ for_each_path(lower_base_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ for_each_path(lower_base_directories, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(lower_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ /* Checks upper layer. */ ++ for_each_path(upper_base_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ for_each_path(upper_base_directories, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(upper_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ /* ++ * Checks that access rights are independent from the lower and upper ++ * layers: write access to upper files viewed through the merge point ++ * is still allowed, and write access to lower file viewed (and copied) ++ * through the merge point is still allowed. ++ */ ++ for_each_path(merge_base_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ for_each_path(merge_base_directories, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(merge_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ ++ /* Sets rules on data directories (i.e. inside overlay scope). */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks merge. */ ++ for_each_path(merge_base_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ for_each_path(merge_base_directories, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(merge_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ ++ /* Same checks with tighter rules. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks changes for lower layer. */ ++ for_each_path(lower_base_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); ++ } ++ /* Checks changes for upper layer. */ ++ for_each_path(upper_base_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); ++ } ++ /* Checks all merge accesses. */ ++ for_each_path(merge_base_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); ++ } ++ for_each_path(merge_base_directories, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(merge_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ ++ /* Sets rules directly on overlayed files. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks unchanged accesses on lower layer. */ ++ for_each_path(lower_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ /* Checks unchanged accesses on upper layer. */ ++ for_each_path(upper_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDONLY)); ++ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY)); ++ } ++ /* Checks all merge accesses. */ ++ for_each_path(merge_base_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); ++ } ++ for_each_path(merge_base_directories, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(merge_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++ ++ /* Only allowes access to the merge hierarchy. */ ++ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only); ++ ASSERT_LE(0, ruleset_fd); ++ enforce_ruleset(_metadata, ruleset_fd); ++ ASSERT_EQ(0, close(ruleset_fd)); ++ ++ /* Checks new accesses on lower layer. */ ++ for_each_path(lower_sub_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); ++ } ++ /* Checks new accesses on upper layer. */ ++ for_each_path(upper_sub_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY)); ++ } ++ /* Checks all merge accesses. */ ++ for_each_path(merge_base_files, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR)); ++ } ++ for_each_path(merge_base_directories, path_entry, i) { ++ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY | O_DIRECTORY)); ++ } ++ for_each_path(merge_sub_files, path_entry, i) { ++ ASSERT_EQ(0, test_open(path_entry, O_RDWR)); ++ } ++} ++ ++TEST_HARNESS_MAIN +diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c +new file mode 100644 +index 0000000000000..15fbef9cc8496 +--- /dev/null ++++ b/tools/testing/selftests/landlock/ptrace_test.c +@@ -0,0 +1,337 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Landlock tests - Ptrace ++ * ++ * Copyright © 2017-2020 Mickaël Salaün ++ * Copyright © 2019-2020 ANSSI ++ */ ++ ++#define _GNU_SOURCE ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "common.h" ++ ++static void create_domain(struct __test_metadata *const _metadata) ++{ ++ int ruleset_fd; ++ struct landlock_ruleset_attr ruleset_attr = { ++ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK, ++ }; ++ ++ ruleset_fd = landlock_create_ruleset(&ruleset_attr, ++ sizeof(ruleset_attr), 0); ++ EXPECT_LE(0, ruleset_fd) { ++ TH_LOG("Failed to create a ruleset: %s", strerror(errno)); ++ } ++ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); ++ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); ++ EXPECT_EQ(0, close(ruleset_fd)); ++} ++ ++static int test_ptrace_read(const pid_t pid) ++{ ++ static const char path_template[] = "/proc/%d/environ"; ++ char procenv_path[sizeof(path_template) + 10]; ++ int procenv_path_size, fd; ++ ++ procenv_path_size = snprintf(procenv_path, sizeof(procenv_path), ++ path_template, pid); ++ if (procenv_path_size >= sizeof(procenv_path)) ++ return E2BIG; ++ ++ fd = open(procenv_path, O_RDONLY | O_CLOEXEC); ++ if (fd < 0) ++ return errno; ++ /* ++ * Mixing error codes from close(2) and open(2) should not lead to any ++ * (access type) confusion for this test. ++ */ ++ if (close(fd) != 0) ++ return errno; ++ return 0; ++} ++ ++FIXTURE(hierarchy) { }; ++ ++FIXTURE_VARIANT(hierarchy) { ++ const bool domain_both; ++ const bool domain_parent; ++ const bool domain_child; ++}; ++ ++/* ++ * Test multiple tracing combinations between a parent process P1 and a child ++ * process P2. ++ * ++ * Yama's scoped ptrace is presumed disabled. If enabled, this optional ++ * restriction is enforced in addition to any Landlock check, which means that ++ * all P2 requests to trace P1 would be denied. ++ */ ++ ++/* ++ * No domain ++ * ++ * P1-. P1 -> P2 : allow ++ * \ P2 -> P1 : allow ++ * 'P2 ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) { ++ .domain_both = false, ++ .domain_parent = false, ++ .domain_child = false, ++}; ++ ++/* ++ * Child domain ++ * ++ * P1--. P1 -> P2 : allow ++ * \ P2 -> P1 : deny ++ * .'-----. ++ * | P2 | ++ * '------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) { ++ .domain_both = false, ++ .domain_parent = false, ++ .domain_child = true, ++}; ++ ++/* ++ * Parent domain ++ * .------. ++ * | P1 --. P1 -> P2 : deny ++ * '------' \ P2 -> P1 : allow ++ * ' ++ * P2 ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) { ++ .domain_both = false, ++ .domain_parent = true, ++ .domain_child = false, ++}; ++ ++/* ++ * Parent + child domain (siblings) ++ * .------. ++ * | P1 ---. P1 -> P2 : deny ++ * '------' \ P2 -> P1 : deny ++ * .---'--. ++ * | P2 | ++ * '------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) { ++ .domain_both = false, ++ .domain_parent = true, ++ .domain_child = true, ++}; ++ ++/* ++ * Same domain (inherited) ++ * .-------------. ++ * | P1----. | P1 -> P2 : allow ++ * | \ | P2 -> P1 : allow ++ * | ' | ++ * | P2 | ++ * '-------------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) { ++ .domain_both = true, ++ .domain_parent = false, ++ .domain_child = false, ++}; ++ ++/* ++ * Inherited + child domain ++ * .-----------------. ++ * | P1----. | P1 -> P2 : allow ++ * | \ | P2 -> P1 : deny ++ * | .-'----. | ++ * | | P2 | | ++ * | '------' | ++ * '-----------------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) { ++ .domain_both = true, ++ .domain_parent = false, ++ .domain_child = true, ++}; ++ ++/* ++ * Inherited + parent domain ++ * .-----------------. ++ * |.------. | P1 -> P2 : deny ++ * || P1 ----. | P2 -> P1 : allow ++ * |'------' \ | ++ * | ' | ++ * | P2 | ++ * '-----------------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) { ++ .domain_both = true, ++ .domain_parent = true, ++ .domain_child = false, ++}; ++ ++/* ++ * Inherited + parent and child domain (siblings) ++ * .-----------------. ++ * | .------. | P1 -> P2 : deny ++ * | | P1 . | P2 -> P1 : deny ++ * | '------'\ | ++ * | \ | ++ * | .--'---. | ++ * | | P2 | | ++ * | '------' | ++ * '-----------------' ++ */ ++FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) { ++ .domain_both = true, ++ .domain_parent = true, ++ .domain_child = true, ++}; ++ ++FIXTURE_SETUP(hierarchy) ++{ } ++ ++FIXTURE_TEARDOWN(hierarchy) ++{ } ++ ++/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */ ++TEST_F(hierarchy, trace) ++{ ++ pid_t child, parent; ++ int status, err_proc_read; ++ int pipe_child[2], pipe_parent[2]; ++ char buf_parent; ++ long ret; ++ ++ /* ++ * Removes all effective and permitted capabilities to not interfere ++ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS. ++ */ ++ drop_caps(_metadata); ++ ++ parent = getpid(); ++ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); ++ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); ++ if (variant->domain_both) { ++ create_domain(_metadata); ++ if (!_metadata->passed) ++ /* Aborts before forking. */ ++ return; ++ } ++ ++ child = fork(); ++ ASSERT_LE(0, child); ++ if (child == 0) { ++ char buf_child; ++ ++ ASSERT_EQ(0, close(pipe_parent[1])); ++ ASSERT_EQ(0, close(pipe_child[0])); ++ if (variant->domain_child) ++ create_domain(_metadata); ++ ++ /* Waits for the parent to be in a domain, if any. */ ++ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); ++ ++ /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */ ++ err_proc_read = test_ptrace_read(parent); ++ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0); ++ if (variant->domain_child) { ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EPERM, errno); ++ EXPECT_EQ(EACCES, err_proc_read); ++ } else { ++ EXPECT_EQ(0, ret); ++ EXPECT_EQ(0, err_proc_read); ++ } ++ if (ret == 0) { ++ ASSERT_EQ(parent, waitpid(parent, &status, 0)); ++ ASSERT_EQ(1, WIFSTOPPED(status)); ++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0)); ++ } ++ ++ /* Tests child PTRACE_TRACEME. */ ++ ret = ptrace(PTRACE_TRACEME); ++ if (variant->domain_parent) { ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EPERM, errno); ++ } else { ++ EXPECT_EQ(0, ret); ++ } ++ ++ /* ++ * Signals that the PTRACE_ATTACH test is done and the ++ * PTRACE_TRACEME test is ongoing. ++ */ ++ ASSERT_EQ(1, write(pipe_child[1], ".", 1)); ++ ++ if (!variant->domain_parent) { ++ ASSERT_EQ(0, raise(SIGSTOP)); ++ } ++ ++ /* Waits for the parent PTRACE_ATTACH test. */ ++ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); ++ _exit(_metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); ++ return; ++ } ++ ++ ASSERT_EQ(0, close(pipe_child[1])); ++ ASSERT_EQ(0, close(pipe_parent[0])); ++ if (variant->domain_parent) ++ create_domain(_metadata); ++ ++ /* Signals that the parent is in a domain, if any. */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ++ /* ++ * Waits for the child to test PTRACE_ATTACH on the parent and start ++ * testing PTRACE_TRACEME. ++ */ ++ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); ++ ++ /* Tests child PTRACE_TRACEME. */ ++ if (!variant->domain_parent) { ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(1, WIFSTOPPED(status)); ++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); ++ } else { ++ /* The child should not be traced by the parent. */ ++ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0)); ++ EXPECT_EQ(ESRCH, errno); ++ } ++ ++ /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */ ++ err_proc_read = test_ptrace_read(child); ++ ret = ptrace(PTRACE_ATTACH, child, NULL, 0); ++ if (variant->domain_parent) { ++ EXPECT_EQ(-1, ret); ++ EXPECT_EQ(EPERM, errno); ++ EXPECT_EQ(EACCES, err_proc_read); ++ } else { ++ EXPECT_EQ(0, ret); ++ EXPECT_EQ(0, err_proc_read); ++ } ++ if (ret == 0) { ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ ASSERT_EQ(1, WIFSTOPPED(status)); ++ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); ++ } ++ ++ /* Signals that the parent PTRACE_ATTACH test is done. */ ++ ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); ++ ASSERT_EQ(child, waitpid(child, &status, 0)); ++ if (WIFSIGNALED(status) || !WIFEXITED(status) || ++ WEXITSTATUS(status) != EXIT_SUCCESS) ++ _metadata->passed = 0; ++} ++ ++TEST_HARNESS_MAIN +diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c +new file mode 100644 +index 0000000000000..3f9ccbf52783a +--- /dev/null ++++ b/tools/testing/selftests/landlock/true.c +@@ -0,0 +1,5 @@ ++// SPDX-License-Identifier: GPL-2.0 ++int main(void) ++{ ++ return 0; ++} +-- +2.39.2 + diff --git a/queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch b/queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch new file mode 100644 index 00000000000..ffdf1daa347 --- /dev/null +++ b/queue-5.10/selftests-landlock-skip-overlayfs-tests-when-not-sup.patch @@ -0,0 +1,123 @@ +From 54bbaa4518388e88b866567125ccc67d87b9a423 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 05:32:29 +0000 +Subject: selftests/landlock: Skip overlayfs tests when not supported +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jeff Xu + +[ Upstream commit 366617a69e60610912836570546f118006ebc7cb ] + +overlayfs may be disabled in the kernel configuration, causing related +tests to fail. Check that overlayfs is supported at runtime, so we can +skip layout2_overlay.* accordingly. + +Signed-off-by: Jeff Xu +Reviewed-by: Guenter Roeck +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230113053229.1281774-2-jeffxu@google.com +[mic: Reword comments and constify variables] +Signed-off-by: Mickaël Salaün +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/landlock/fs_test.c | 47 ++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c +index 10c9a1e4ebd9b..db153452b110a 100644 +--- a/tools/testing/selftests/landlock/fs_test.c ++++ b/tools/testing/selftests/landlock/fs_test.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -74,6 +75,40 @@ static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3"; + * └── s3d3 + */ + ++static bool fgrep(FILE *const inf, const char *const str) ++{ ++ char line[32]; ++ const int slen = strlen(str); ++ ++ while (!feof(inf)) { ++ if (!fgets(line, sizeof(line), inf)) ++ break; ++ if (strncmp(line, str, slen)) ++ continue; ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++static bool supports_overlayfs(void) ++{ ++ bool res; ++ FILE *const inf = fopen("/proc/filesystems", "r"); ++ ++ /* ++ * Consider that the filesystem is supported if we cannot get the ++ * supported ones. ++ */ ++ if (!inf) ++ return true; ++ ++ res = fgrep(inf, "nodev\toverlay\n"); ++ fclose(inf); ++ return res; ++} ++ + static void mkdir_parents(struct __test_metadata *const _metadata, + const char *const path) + { +@@ -2416,6 +2451,9 @@ FIXTURE(layout2_overlay) { + + FIXTURE_SETUP(layout2_overlay) + { ++ if (!supports_overlayfs()) ++ SKIP(return, "overlayfs is not supported"); ++ + prepare_layout(_metadata); + + create_directory(_metadata, LOWER_BASE); +@@ -2453,6 +2491,9 @@ FIXTURE_SETUP(layout2_overlay) + + FIXTURE_TEARDOWN(layout2_overlay) + { ++ if (!supports_overlayfs()) ++ SKIP(return, "overlayfs is not supported"); ++ + EXPECT_EQ(0, remove_path(lower_do1_fl3)); + EXPECT_EQ(0, remove_path(lower_dl1_fl2)); + EXPECT_EQ(0, remove_path(lower_fl1)); +@@ -2484,6 +2525,9 @@ FIXTURE_TEARDOWN(layout2_overlay) + + TEST_F_FORK(layout2_overlay, no_restriction) + { ++ if (!supports_overlayfs()) ++ SKIP(return, "overlayfs is not supported"); ++ + ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY)); + ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY)); +@@ -2647,6 +2691,9 @@ TEST_F_FORK(layout2_overlay, same_content_different_file) + size_t i; + const char *path_entry; + ++ if (!supports_overlayfs()) ++ SKIP(return, "overlayfs is not supported"); ++ + /* Sets rules on base directories (i.e. outside overlay scope). */ + ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base); + ASSERT_LE(0, ruleset_fd); +-- +2.39.2 + diff --git a/queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch b/queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch new file mode 100644 index 00000000000..bc08571f063 --- /dev/null +++ b/queue-5.10/selftests-landlock-test-ptrace-as-much-as-possible-w.patch @@ -0,0 +1,220 @@ +From 16905b2ec61fefd3ae8a5ed6e13c12ce98bd579e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 14 Jan 2023 02:03:06 +0000 +Subject: selftests/landlock: Test ptrace as much as possible with Yama +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jeff Xu + +[ Upstream commit 8677e555f17f51321d0730b945aeb7d4b95f998f ] + +Update ptrace tests according to all potential Yama security policies. +This is required to make such tests pass even if Yama is enabled. + +Tests are not skipped but they now check both Landlock and Yama boundary +restrictions at run time to keep a maximum test coverage (i.e. positive +and negative testing). + +Signed-off-by: Jeff Xu +Link: https://lore.kernel.org/r/20230114020306.1407195-2-jeffxu@google.com +Cc: stable@vger.kernel.org +[mic: Add curly braces around EXPECT_EQ() to make it build, and improve +commit message] +Co-developed-by: Mickaël Salaün +Signed-off-by: Mickaël Salaün +Signed-off-by: Sasha Levin +--- + .../testing/selftests/landlock/ptrace_test.c | 113 +++++++++++++++--- + 1 file changed, 96 insertions(+), 17 deletions(-) + +diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c +index 090adadfe2dc3..14745cceb059a 100644 +--- a/tools/testing/selftests/landlock/ptrace_test.c ++++ b/tools/testing/selftests/landlock/ptrace_test.c +@@ -19,6 +19,12 @@ + + #include "common.h" + ++/* Copied from security/yama/yama_lsm.c */ ++#define YAMA_SCOPE_DISABLED 0 ++#define YAMA_SCOPE_RELATIONAL 1 ++#define YAMA_SCOPE_CAPABILITY 2 ++#define YAMA_SCOPE_NO_ATTACH 3 ++ + static void create_domain(struct __test_metadata *const _metadata) + { + int ruleset_fd; +@@ -59,6 +65,25 @@ static int test_ptrace_read(const pid_t pid) + return 0; + } + ++static int get_yama_ptrace_scope(void) ++{ ++ int ret; ++ char buf[2] = {}; ++ const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY); ++ ++ if (fd < 0) ++ return 0; ++ ++ if (read(fd, buf, 1) < 0) { ++ close(fd); ++ return -1; ++ } ++ ++ ret = atoi(buf); ++ close(fd); ++ return ret; ++} ++ + /* clang-format off */ + FIXTURE(hierarchy) {}; + /* clang-format on */ +@@ -228,8 +253,51 @@ TEST_F(hierarchy, trace) + pid_t child, parent; + int status, err_proc_read; + int pipe_child[2], pipe_parent[2]; ++ int yama_ptrace_scope; + char buf_parent; + long ret; ++ bool can_read_child, can_trace_child, can_read_parent, can_trace_parent; ++ ++ yama_ptrace_scope = get_yama_ptrace_scope(); ++ ASSERT_LE(0, yama_ptrace_scope); ++ ++ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED) ++ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)", ++ yama_ptrace_scope); ++ ++ /* ++ * can_read_child is true if a parent process can read its child ++ * process, which is only the case when the parent process is not ++ * isolated from the child with a dedicated Landlock domain. ++ */ ++ can_read_child = !variant->domain_parent; ++ ++ /* ++ * can_trace_child is true if a parent process can trace its child ++ * process. This depends on two conditions: ++ * - The parent process is not isolated from the child with a dedicated ++ * Landlock domain. ++ * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL). ++ */ ++ can_trace_child = can_read_child && ++ yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL; ++ ++ /* ++ * can_read_parent is true if a child process can read its parent ++ * process, which is only the case when the child process is not ++ * isolated from the parent with a dedicated Landlock domain. ++ */ ++ can_read_parent = !variant->domain_child; ++ ++ /* ++ * can_trace_parent is true if a child process can trace its parent ++ * process. This depends on two conditions: ++ * - The child process is not isolated from the parent with a dedicated ++ * Landlock domain. ++ * - Yama is disabled (YAMA_SCOPE_DISABLED). ++ */ ++ can_trace_parent = can_read_parent && ++ yama_ptrace_scope <= YAMA_SCOPE_DISABLED; + + /* + * Removes all effective and permitted capabilities to not interfere +@@ -260,16 +328,21 @@ TEST_F(hierarchy, trace) + /* Waits for the parent to be in a domain, if any. */ + ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1)); + +- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the parent. */ ++ /* Tests PTRACE_MODE_READ on the parent. */ + err_proc_read = test_ptrace_read(parent); ++ if (can_read_parent) { ++ EXPECT_EQ(0, err_proc_read); ++ } else { ++ EXPECT_EQ(EACCES, err_proc_read); ++ } ++ ++ /* Tests PTRACE_ATTACH on the parent. */ + ret = ptrace(PTRACE_ATTACH, parent, NULL, 0); +- if (variant->domain_child) { ++ if (can_trace_parent) { ++ EXPECT_EQ(0, ret); ++ } else { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); +- EXPECT_EQ(EACCES, err_proc_read); +- } else { +- EXPECT_EQ(0, ret); +- EXPECT_EQ(0, err_proc_read); + } + if (ret == 0) { + ASSERT_EQ(parent, waitpid(parent, &status, 0)); +@@ -279,11 +352,11 @@ TEST_F(hierarchy, trace) + + /* Tests child PTRACE_TRACEME. */ + ret = ptrace(PTRACE_TRACEME); +- if (variant->domain_parent) { ++ if (can_trace_child) { ++ EXPECT_EQ(0, ret); ++ } else { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); +- } else { +- EXPECT_EQ(0, ret); + } + + /* +@@ -292,7 +365,7 @@ TEST_F(hierarchy, trace) + */ + ASSERT_EQ(1, write(pipe_child[1], ".", 1)); + +- if (!variant->domain_parent) { ++ if (can_trace_child) { + ASSERT_EQ(0, raise(SIGSTOP)); + } + +@@ -317,7 +390,7 @@ TEST_F(hierarchy, trace) + ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1)); + + /* Tests child PTRACE_TRACEME. */ +- if (!variant->domain_parent) { ++ if (can_trace_child) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0)); +@@ -327,17 +400,23 @@ TEST_F(hierarchy, trace) + EXPECT_EQ(ESRCH, errno); + } + +- /* Tests PTRACE_ATTACH and PTRACE_MODE_READ on the child. */ ++ /* Tests PTRACE_MODE_READ on the child. */ + err_proc_read = test_ptrace_read(child); ++ if (can_read_child) { ++ EXPECT_EQ(0, err_proc_read); ++ } else { ++ EXPECT_EQ(EACCES, err_proc_read); ++ } ++ ++ /* Tests PTRACE_ATTACH on the child. */ + ret = ptrace(PTRACE_ATTACH, child, NULL, 0); +- if (variant->domain_parent) { ++ if (can_trace_child) { ++ EXPECT_EQ(0, ret); ++ } else { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EPERM, errno); +- EXPECT_EQ(EACCES, err_proc_read); +- } else { +- EXPECT_EQ(0, ret); +- EXPECT_EQ(0, err_proc_read); + } ++ + if (ret == 0) { + ASSERT_EQ(child, waitpid(child, &status, 0)); + ASSERT_EQ(1, WIFSTOPPED(status)); +-- +2.39.2 + diff --git a/queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch b/queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch new file mode 100644 index 00000000000..3f9295442f4 --- /dev/null +++ b/queue-5.10/selftests-nft_nat-ensuring-the-listening-side-is-up-.patch @@ -0,0 +1,58 @@ +From 18e64ec0888c0efc74dd751f978d97e046941ed1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 27 Feb 2023 17:36:46 +0800 +Subject: selftests: nft_nat: ensuring the listening side is up before starting + the client + +From: Hangbin Liu + +[ Upstream commit 2067e7a00aa604b94de31d64f29b8893b1696f26 ] + +The test_local_dnat_portonly() function initiates the client-side as +soon as it sets the listening side to the background. This could lead to +a race condition where the server may not be ready to listen. To ensure +that the server-side is up and running before initiating the +client-side, a delay is introduced to the test_local_dnat_portonly() +function. + +Before the fix: + # ./nft_nat.sh + PASS: netns routing/connectivity: ns0-rthlYrBU can reach ns1-rthlYrBU and ns2-rthlYrBU + PASS: ping to ns1-rthlYrBU was ip NATted to ns2-rthlYrBU + PASS: ping to ns1-rthlYrBU OK after ip nat output chain flush + PASS: ipv6 ping to ns1-rthlYrBU was ip6 NATted to ns2-rthlYrBU + 2023/02/27 04:11:03 socat[6055] E connect(5, AF=2 10.0.1.99:2000, 16): Connection refused + ERROR: inet port rewrite + +After the fix: + # ./nft_nat.sh + PASS: netns routing/connectivity: ns0-9sPJV6JJ can reach ns1-9sPJV6JJ and ns2-9sPJV6JJ + PASS: ping to ns1-9sPJV6JJ was ip NATted to ns2-9sPJV6JJ + PASS: ping to ns1-9sPJV6JJ OK after ip nat output chain flush + PASS: ipv6 ping to ns1-9sPJV6JJ was ip6 NATted to ns2-9sPJV6JJ + PASS: inet port rewrite without l3 address + +Fixes: 282e5f8fe907 ("netfilter: nat: really support inet nat without l3 address") +Signed-off-by: Hangbin Liu +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/netfilter/nft_nat.sh | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh +index 4e15e81673104..67697d8ea59a5 100755 +--- a/tools/testing/selftests/netfilter/nft_nat.sh ++++ b/tools/testing/selftests/netfilter/nft_nat.sh +@@ -404,6 +404,8 @@ EOF + echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 & + sc_s=$! + ++ sleep 1 ++ + result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT) + + if [ "$result" = "SERVER-inet" ];then +-- +2.39.2 + diff --git a/queue-5.10/series b/queue-5.10/series index 89c6906a197..92235b40fff 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -10,3 +10,61 @@ ext4-move-where-set-the-may_inline_data-flag-is-set.patch ext4-fix-warning-in-ext4_update_inline_data.patch ext4-zero-i_disksize-when-initializing-the-bootloader-inode.patch nfc-change-order-inside-nfc_se_io-error-path.patch +landlock-add-object-management.patch +selftests-landlock-add-user-space-tests.patch +selftests-landlock-skip-overlayfs-tests-when-not-sup.patch +udf-fix-off-by-one-error-when-discarding-preallocati.patch +selftests-landlock-add-clang-format-exceptions.patch +selftests-landlock-test-ptrace-as-much-as-possible-w.patch +irq-fix-typos-in-comments.patch +irqdomain-look-for-existing-mapping-only-once.patch +irqdomain-refactor-__irq_domain_alloc_irqs.patch +irqdomain-fix-mapping-creation-race.patch +irqdomain-change-the-type-of-size-in-__irq_domain_ad.patch +irqdomain-fix-domain-registration-race.patch +software-node-introduce-device_add_software_node.patch +usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch +usb-dwc3-pci-id-for-tiger-lake-cpu.patch +usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch +usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch +usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch +riscv-using-patchable_function_entry-instead-of-mcou.patch +riscv-ftrace-remove-wasted-nops-for-riscv_isa_c.patch +riscv-ftrace-reduce-the-detour-code-size-to-half.patch +iommu-vt-d-fix-lockdep-splat-in-intel_pasid_get_entr.patch +iommu-vt-d-fix-pasid-directory-pointer-coherency.patch +efi-earlycon-replace-open-coded-strnchrnul.patch +arm64-efi-make-efi_rt_lock-a-raw_spinlock.patch +risc-v-avoid-dereferening-null-regs-in-die.patch +riscv-avoid-enabling-interrupts-in-die.patch +riscv-add-header-include-guards-to-insn.h.patch +scsi-core-remove-the-proc-scsi-proc_name-directory-e.patch +ext4-fix-possible-corruption-when-moving-a-directory.patch +drm-nouveau-kms-nv50-remove-unused-functions.patch +drm-nouveau-kms-nv50-fix-nv50_wndw_new_-prototype.patch +drm-msm-fix-potential-invalid-ptr-free.patch +drm-msm-a5xx-fix-setting-of-the-cp_preempt_enable_lo.patch +drm-msm-document-and-rename-preempt_lock.patch +drm-msm-a5xx-fix-the-emptyness-check-in-the-preempt-.patch +drm-msm-a5xx-fix-context-faults-during-ring-switch.patch +bgmac-fix-initial-chip-reset-to-support-bcm5358.patch +nfc-fdp-add-null-check-of-devm_kmalloc_array-in-fdp_.patch +powerpc-dts-t1040rdb-fix-compatible-string-for-rev-a.patch +ila-do-not-generate-empty-messages-in-ila_xlat_nl_cm.patch +selftests-nft_nat-ensuring-the-listening-side-is-up-.patch +net-usb-lan78xx-remove-lots-of-set-but-unused-ret-va.patch +net-lan78xx-fix-accessing-the-lan7800-s-internal-phy.patch +net-caif-fix-use-after-free-in-cfusbl_device_notify.patch +net-stmmac-add-to-set-device-wake-up-flag-when-stmma.patch +net-phylib-get-rid-of-unnecessary-locking.patch +bnxt_en-avoid-order-5-memory-allocation-for-tpa-data.patch +netfilter-ctnetlink-revert-to-dumping-mark-regardles.patch +netfilter-tproxy-fix-deadlock-due-to-missing-bh-disa.patch +btf-fix-resolving-btf_kind_var-after-array-struct-un.patch +net-ethernet-mtk_eth_soc-fix-rx-data-corruption-issu.patch +scsi-megaraid_sas-update-max-supported-ld-ids-to-240.patch +platform-x86-mlx_platform-select-regmap-instead-of-d.patch +net-smc-fix-fallback-failed-while-sendmsg-with-fasto.patch +sunrpc-fix-a-server-shutdown-leak.patch +riscv-use-read_once_nocheck-in-imprecise-unwinding-s.patch +risc-v-don-t-check-text_mutex-during-stop_machine.patch diff --git a/queue-5.10/software-node-introduce-device_add_software_node.patch b/queue-5.10/software-node-introduce-device_add_software_node.patch new file mode 100644 index 00000000000..55df990b025 --- /dev/null +++ b/queue-5.10/software-node-introduce-device_add_software_node.patch @@ -0,0 +1,135 @@ +From 5006088daaf713e090bef68ab9d5a8082d043762 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Jan 2021 12:49:11 +0300 +Subject: software node: Introduce device_add_software_node() + +From: Heikki Krogerus + +[ Upstream commit e68d0119e3284334de5650a1ac42ef4e179f895e ] + +This helper will register a software node and then assign +it to device at the same time. The function will also make +sure that the device can't have more than one software node. + +Acked-by: Felipe Balbi +Signed-off-by: Heikki Krogerus +Link: https://lore.kernel.org/r/20210115094914.88401-2-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M") +Signed-off-by: Sasha Levin +--- + drivers/base/swnode.c | 71 +++++++++++++++++++++++++++++++++++----- + include/linux/property.h | 3 ++ + 2 files changed, 65 insertions(+), 9 deletions(-) + +diff --git a/drivers/base/swnode.c b/drivers/base/swnode.c +index d2fb3eb5816c3..572a53e6f2e88 100644 +--- a/drivers/base/swnode.c ++++ b/drivers/base/swnode.c +@@ -48,6 +48,19 @@ EXPORT_SYMBOL_GPL(is_software_node); + struct swnode, fwnode) : NULL; \ + }) + ++static inline struct swnode *dev_to_swnode(struct device *dev) ++{ ++ struct fwnode_handle *fwnode = dev_fwnode(dev); ++ ++ if (!fwnode) ++ return NULL; ++ ++ if (!is_software_node(fwnode)) ++ fwnode = fwnode->secondary; ++ ++ return to_swnode(fwnode); ++} ++ + static struct swnode * + software_node_to_swnode(const struct software_node *node) + { +@@ -850,22 +863,62 @@ void fwnode_remove_software_node(struct fwnode_handle *fwnode) + } + EXPORT_SYMBOL_GPL(fwnode_remove_software_node); + ++/** ++ * device_add_software_node - Assign software node to a device ++ * @dev: The device the software node is meant for. ++ * @swnode: The software node. ++ * ++ * This function will register @swnode and make it the secondary firmware node ++ * pointer of @dev. If @dev has no primary node, then @swnode will become the primary ++ * node. ++ */ ++int device_add_software_node(struct device *dev, const struct software_node *swnode) ++{ ++ int ret; ++ ++ /* Only one software node per device. */ ++ if (dev_to_swnode(dev)) ++ return -EBUSY; ++ ++ ret = software_node_register(swnode); ++ if (ret) ++ return ret; ++ ++ set_secondary_fwnode(dev, software_node_fwnode(swnode)); ++ ++ return 0; ++} ++EXPORT_SYMBOL_GPL(device_add_software_node); ++ ++/** ++ * device_remove_software_node - Remove device's software node ++ * @dev: The device with the software node. ++ * ++ * This function will unregister the software node of @dev. ++ */ ++void device_remove_software_node(struct device *dev) ++{ ++ struct swnode *swnode; ++ ++ swnode = dev_to_swnode(dev); ++ if (!swnode) ++ return; ++ ++ software_node_notify(dev, KOBJ_REMOVE); ++ set_secondary_fwnode(dev, NULL); ++ kobject_put(&swnode->kobj); ++} ++EXPORT_SYMBOL_GPL(device_remove_software_node); ++ + int software_node_notify(struct device *dev, unsigned long action) + { +- struct fwnode_handle *fwnode = dev_fwnode(dev); + struct swnode *swnode; + int ret; + +- if (!fwnode) +- return 0; +- +- if (!is_software_node(fwnode)) +- fwnode = fwnode->secondary; +- if (!is_software_node(fwnode)) ++ swnode = dev_to_swnode(dev); ++ if (!swnode) + return 0; + +- swnode = to_swnode(fwnode); +- + switch (action) { + case KOBJ_ADD: + ret = sysfs_create_link(&dev->kobj, &swnode->kobj, +diff --git a/include/linux/property.h b/include/linux/property.h +index 2d4542629d80b..3b6093f6bd04c 100644 +--- a/include/linux/property.h ++++ b/include/linux/property.h +@@ -485,4 +485,7 @@ fwnode_create_software_node(const struct property_entry *properties, + const struct fwnode_handle *parent); + void fwnode_remove_software_node(struct fwnode_handle *fwnode); + ++int device_add_software_node(struct device *dev, const struct software_node *swnode); ++void device_remove_software_node(struct device *dev); ++ + #endif /* _LINUX_PROPERTY_H_ */ +-- +2.39.2 + diff --git a/queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch b/queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch new file mode 100644 index 00000000000..b0135ef0f87 --- /dev/null +++ b/queue-5.10/sunrpc-fix-a-server-shutdown-leak.patch @@ -0,0 +1,48 @@ +From c2f7cb0f1f288f12942f9ae91387ec83625a8490 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Mar 2023 16:08:32 -0500 +Subject: SUNRPC: Fix a server shutdown leak + +From: Benjamin Coddington + +[ Upstream commit 9ca6705d9d609441d34f8b853e1e4a6369b3b171 ] + +Fix a race where kthread_stop() may prevent the threadfn from ever getting +called. If that happens the svc_rqst will not be cleaned up. + +Fixes: ed6473ddc704 ("NFSv4: Fix callback server shutdown") +Signed-off-by: Benjamin Coddington +Reviewed-by: Jeff Layton +Signed-off-by: Chuck Lever +Signed-off-by: Sasha Levin +--- + net/sunrpc/svc.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c +index d38788cd9433a..af657a482ad2d 100644 +--- a/net/sunrpc/svc.c ++++ b/net/sunrpc/svc.c +@@ -800,6 +800,7 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); + static int + svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) + { ++ struct svc_rqst *rqstp; + struct task_struct *task; + unsigned int state = serv->sv_nrthreads-1; + +@@ -808,7 +809,10 @@ svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) + task = choose_victim(serv, pool, &state); + if (task == NULL) + break; +- kthread_stop(task); ++ rqstp = kthread_data(task); ++ /* Did we lose a race to svo_function threadfn? */ ++ if (kthread_stop(task) == -EINTR) ++ svc_exit_thread(rqstp); + nrservs++; + } while (nrservs < 0); + return 0; +-- +2.39.2 + diff --git a/queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch b/queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch new file mode 100644 index 00000000000..5aafe259627 --- /dev/null +++ b/queue-5.10/udf-fix-off-by-one-error-when-discarding-preallocati.patch @@ -0,0 +1,38 @@ +From a0dd1017e6c63ac7a75b9b4c039369f452bd8eb4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 23 Jan 2023 14:29:15 +0100 +Subject: udf: Fix off-by-one error when discarding preallocation + +From: Jan Kara + +[ Upstream commit f54aa97fb7e5329a373f9df4e5e213ced4fc8759 ] + +The condition determining whether the preallocation can be used had +an off-by-one error so we didn't discard preallocation when new +allocation was just following it. This can then confuse code in +inode_getblk(). + +CC: stable@vger.kernel.org +Fixes: 16d055656814 ("udf: Discard preallocation before extending file with a hole") +Signed-off-by: Jan Kara +Signed-off-by: Sasha Levin +--- + fs/udf/inode.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/udf/inode.c b/fs/udf/inode.c +index 81876284a83c0..d114774ecdea8 100644 +--- a/fs/udf/inode.c ++++ b/fs/udf/inode.c +@@ -442,7 +442,7 @@ static int udf_get_block(struct inode *inode, sector_t block, + * Block beyond EOF and prealloc extents? Just discard preallocation + * as it is not useful and complicates things. + */ +- if (((loff_t)block) << inode->i_blkbits > iinfo->i_lenExtents) ++ if (((loff_t)block) << inode->i_blkbits >= iinfo->i_lenExtents) + udf_discard_prealloc(inode); + udf_clear_extent_cache(inode); + phys = inode_getblk(inode, block, &err, &new); +-- +2.39.2 + diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch new file mode 100644 index 00000000000..37c43f7df04 --- /dev/null +++ b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-m.patch @@ -0,0 +1,46 @@ +From 47186e711c93ccd622578fa4d86cc7499dfb1aff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Feb 2023 15:27:11 +0200 +Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-M + +From: Heikki Krogerus + +[ Upstream commit 8e5248c3a8778f3e394e9a19195bc7a48f567ca2 ] + +This patch adds the necessary PCI IDs for Intel Meteor Lake-M +devices. + +Signed-off-by: Heikki Krogerus +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20230215132711.35668-1-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-pci.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c +index 57f4b068cf29b..054429e9b7152 100644 +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -42,6 +42,7 @@ + #define PCI_DEVICE_ID_INTEL_JSP 0x4dee + #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 + #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 ++#define PCI_DEVICE_ID_INTEL_MTLM 0x7eb1 + #define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 + #define PCI_DEVICE_ID_INTEL_MTL 0x7e7e + #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 +@@ -394,6 +395,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLM), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + +-- +2.39.2 + diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch new file mode 100644 index 00000000000..eaacc7886ba --- /dev/null +++ b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-meteor-lake-p.patch @@ -0,0 +1,51 @@ +From 00f613c8f462bab5d623365c67b5ca8defeca5e8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 25 Apr 2022 13:35:18 +0300 +Subject: usb: dwc3: pci: add support for the Intel Meteor Lake-P + +From: Heikki Krogerus + +[ Upstream commit 973e0f7a847ef13ade840d4c30729ce329a66895 ] + +This patch adds the necessary PCI IDs for Intel Meteor Lake-P +devices. + +Signed-off-by: Heikki Krogerus +Cc: stable +Link: https://lore.kernel.org/r/20220425103518.44028-1-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M") +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-pci.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c +index cc97cbb67e5f4..57f4b068cf29b 100644 +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -42,6 +42,8 @@ + #define PCI_DEVICE_ID_INTEL_JSP 0x4dee + #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 + #define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 ++#define PCI_DEVICE_ID_INTEL_MTLP 0x7ec1 ++#define PCI_DEVICE_ID_INTEL_MTL 0x7e7e + #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 + + #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" +@@ -392,6 +394,12 @@ static const struct pci_device_id dwc3_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTLP), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MTL), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + +-- +2.39.2 + diff --git a/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch new file mode 100644 index 00000000000..7e30dc358f5 --- /dev/null +++ b/queue-5.10/usb-dwc3-pci-add-support-for-the-intel-raptor-lake-s.patch @@ -0,0 +1,46 @@ +From 5dbce0765acfe614da7dd457dd1eff8e896594e2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 17:19:48 +0300 +Subject: usb: dwc3: pci: add support for the Intel Raptor Lake-S + +From: Heikki Krogerus + +[ Upstream commit 038438a25c45d5ac996e95a22fa9e76ff3d1f8c7 ] + +This patch adds the necessary PCI ID for Intel Raptor Lake-S +devices. + +Signed-off-by: Heikki Krogerus +Link: https://lore.kernel.org/r/20220214141948.18637-1-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M") +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-pci.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c +index 114d02ebf128a..cc97cbb67e5f4 100644 +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -41,6 +41,7 @@ + #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee + #define PCI_DEVICE_ID_INTEL_JSP 0x4dee + #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 ++#define PCI_DEVICE_ID_INTEL_RPLS 0x7a61 + #define PCI_DEVICE_ID_INTEL_TGL 0x9a15 + + #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" +@@ -388,6 +389,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_RPLS), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + +-- +2.39.2 + diff --git a/queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch b/queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch new file mode 100644 index 00000000000..7642637d4cf --- /dev/null +++ b/queue-5.10/usb-dwc3-pci-id-for-tiger-lake-cpu.patch @@ -0,0 +1,57 @@ +From 1ca135a6080ac8408989feb5e07d261595eba264 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Jan 2021 12:49:13 +0300 +Subject: usb: dwc3: pci: ID for Tiger Lake CPU + +From: Heikki Krogerus + +[ Upstream commit 73203bde3a95a48f27b2454dc6b955280c641afe ] + +Tiger Lake SOC (the versions of it that have integrated USB4 +controller) may have two DWC3 controllers. One is part of +the PCH (Platform Controller Hub, i.e. the chipset) as +usual, and the other is inside the actual CPU block. + +On all Intel platforms that have the two separate DWC3 +controllers, the one inside the CPU handles USB3 and only +USB3 traffic, while the PCH version handles USB2 and USB2 +alone. The reason for splitting the two busses like this is +to allow easy USB3 tunneling over USB4 connections. As USB2 +is not tunneled over USB4, it has dedicated USB controllers +(both xHCI and DWC3). + +Acked-by: Felipe Balbi +Signed-off-by: Heikki Krogerus +Link: https://lore.kernel.org/r/20210115094914.88401-4-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M") +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-pci.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c +index 70cdb59c04c81..114d02ebf128a 100644 +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -41,6 +41,7 @@ + #define PCI_DEVICE_ID_INTEL_TGPH 0x43ee + #define PCI_DEVICE_ID_INTEL_JSP 0x4dee + #define PCI_DEVICE_ID_INTEL_ADLS 0x7ae1 ++#define PCI_DEVICE_ID_INTEL_TGL 0x9a15 + + #define PCI_INTEL_BXT_DSM_GUID "732b85d5-b7a7-4a1b-9ba0-4bbd00ffd511" + #define PCI_INTEL_BXT_FUNC_PMU_PWR 4 +@@ -387,6 +388,9 @@ static const struct pci_device_id dwc3_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), + (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + ++ { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGL), ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, ++ + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB), + (kernel_ulong_t) &dwc3_pci_amd_swnode, }, + { } /* Terminating Entry */ +-- +2.39.2 + diff --git a/queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch b/queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch new file mode 100644 index 00000000000..b80321879d3 --- /dev/null +++ b/queue-5.10/usb-dwc3-pci-register-a-software-node-for-the-dwc3-p.patch @@ -0,0 +1,173 @@ +From 7772a4337dc5115df15e37110e5c52e0db82c0da Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Jan 2021 12:49:12 +0300 +Subject: usb: dwc3: pci: Register a software node for the dwc3 platform device + +From: Heikki Krogerus + +[ Upstream commit e492ce9bcaa1c9661cd3dd6cff0eedf2fa640f31 ] + +By registering the software node directly instead of just +the properties in it, the driver can take advantage of also +the other features the software nodes have. + +Acked-by: Felipe Balbi +Signed-off-by: Heikki Krogerus +Link: https://lore.kernel.org/r/20210115094914.88401-3-heikki.krogerus@linux.intel.com +Signed-off-by: Greg Kroah-Hartman +Stable-dep-of: 8e5248c3a877 ("usb: dwc3: pci: add support for the Intel Meteor Lake-M") +Signed-off-by: Sasha Levin +--- + drivers/usb/dwc3/dwc3-pci.c | 61 ++++++++++++++++++++++--------------- + 1 file changed, 37 insertions(+), 24 deletions(-) + +diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c +index a5a8c5712bce4..70cdb59c04c81 100644 +--- a/drivers/usb/dwc3/dwc3-pci.c ++++ b/drivers/usb/dwc3/dwc3-pci.c +@@ -145,6 +145,18 @@ static const struct property_entry dwc3_pci_amd_properties[] = { + {} + }; + ++static const struct software_node dwc3_pci_intel_swnode = { ++ .properties = dwc3_pci_intel_properties, ++}; ++ ++static const struct software_node dwc3_pci_intel_mrfld_swnode = { ++ .properties = dwc3_pci_mrfld_properties, ++}; ++ ++static const struct software_node dwc3_pci_amd_swnode = { ++ .properties = dwc3_pci_amd_properties, ++}; ++ + static int dwc3_pci_quirks(struct dwc3_pci *dwc) + { + struct pci_dev *pdev = dwc->pci; +@@ -225,7 +237,6 @@ static void dwc3_pci_resume_work(struct work_struct *work) + + static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + { +- struct property_entry *p = (struct property_entry *)id->driver_data; + struct dwc3_pci *dwc; + struct resource res[2]; + int ret; +@@ -268,7 +279,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + dwc->dwc3->dev.parent = dev; + ACPI_COMPANION_SET(&dwc->dwc3->dev, ACPI_COMPANION(dev)); + +- ret = platform_device_add_properties(dwc->dwc3, p); ++ ret = device_add_software_node(&dwc->dwc3->dev, (void *)id->driver_data); + if (ret < 0) + goto err; + +@@ -291,6 +302,7 @@ static int dwc3_pci_probe(struct pci_dev *pci, const struct pci_device_id *id) + + return 0; + err: ++ device_remove_software_node(&dwc->dwc3->dev); + platform_device_put(dwc->dwc3); + return ret; + } +@@ -307,75 +319,76 @@ static void dwc3_pci_remove(struct pci_dev *pci) + #endif + device_init_wakeup(&pci->dev, false); + pm_runtime_get(&pci->dev); ++ device_remove_software_node(&dwc->dwc3->dev); + platform_device_unregister(dwc->dwc3); + } + + static const struct pci_device_id dwc3_pci_id_table[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BSW), +- (kernel_ulong_t) &dwc3_pci_intel_properties }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BYT), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_MRFLD), +- (kernel_ulong_t) &dwc3_pci_mrfld_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_mrfld_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CMLH), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SPTH), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_BXT_M), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_APL), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_KBP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_GLK), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPH), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_CNPV), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ICLLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_EHLLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPLP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_TGPH), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_JSP), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_ADLS), +- (kernel_ulong_t) &dwc3_pci_intel_properties, }, ++ (kernel_ulong_t) &dwc3_pci_intel_swnode, }, + + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_NL_USB), +- (kernel_ulong_t) &dwc3_pci_amd_properties, }, ++ (kernel_ulong_t) &dwc3_pci_amd_swnode, }, + { } /* Terminating Entry */ + }; + MODULE_DEVICE_TABLE(pci, dwc3_pci_id_table); +-- +2.39.2 +