From: Sasha Levin Date: Sun, 11 Feb 2024 03:04:50 +0000 (-0500) Subject: Fixes for 6.6 X-Git-Tag: v6.1.78~67 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=6a6356ab3db540356ed34dd999e3210f8cba04e3;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.6 Signed-off-by: Sasha Levin --- diff --git a/queue-6.6/af_unix-call-kfree_skb-for-dead-unix_-sk-oob_skb-in-.patch b/queue-6.6/af_unix-call-kfree_skb-for-dead-unix_-sk-oob_skb-in-.patch new file mode 100644 index 00000000000..3a991852f2e --- /dev/null +++ b/queue-6.6/af_unix-call-kfree_skb-for-dead-unix_-sk-oob_skb-in-.patch @@ -0,0 +1,112 @@ +From 3c06850c389d1f6caf044bf3d95f8b47f9b2a987 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 3 Feb 2024 10:31:49 -0800 +Subject: af_unix: Call kfree_skb() for dead unix_(sk)->oob_skb in GC. + +From: Kuniyuki Iwashima + +[ Upstream commit 1279f9d9dec2d7462823a18c29ad61359e0a007d ] + +syzbot reported a warning [0] in __unix_gc() with a repro, which +creates a socketpair and sends one socket's fd to itself using the +peer. + + socketpair(AF_UNIX, SOCK_STREAM, 0, [3, 4]) = 0 + sendmsg(4, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\360", iov_len=1}], + msg_iovlen=1, msg_control=[{cmsg_len=20, cmsg_level=SOL_SOCKET, + cmsg_type=SCM_RIGHTS, cmsg_data=[3]}], + msg_controllen=24, msg_flags=0}, MSG_OOB|MSG_PROBE|MSG_DONTWAIT|MSG_ZEROCOPY) = 1 + +This forms a self-cyclic reference that GC should finally untangle +but does not due to lack of MSG_OOB handling, resulting in memory +leak. + +Recently, commit 11498715f266 ("af_unix: Remove io_uring code for +GC.") removed io_uring's dead code in GC and revealed the problem. + +The code was executed at the final stage of GC and unconditionally +moved all GC candidates from gc_candidates to gc_inflight_list. +That papered over the reported problem by always making the following +WARN_ON_ONCE(!list_empty(&gc_candidates)) false. + +The problem has been there since commit 2aab4b969002 ("af_unix: fix +struct pid leaks in OOB support") added full scm support for MSG_OOB +while fixing another bug. + +To fix this problem, we must call kfree_skb() for unix_sk(sk)->oob_skb +if the socket still exists in gc_candidates after purging collected skb. + +Then, we need to set NULL to oob_skb before calling kfree_skb() because +it calls last fput() and triggers unix_release_sock(), where we call +duplicate kfree_skb(u->oob_skb) if not NULL. + +Note that the leaked socket remained being linked to a global list, so +kmemleak also could not detect it. We need to check /proc/net/protocol +to notice the unfreed socket. + +[0]: +WARNING: CPU: 0 PID: 2863 at net/unix/garbage.c:345 __unix_gc+0xc74/0xe80 net/unix/garbage.c:345 +Modules linked in: +CPU: 0 PID: 2863 Comm: kworker/u4:11 Not tainted 6.8.0-rc1-syzkaller-00583-g1701940b1a02 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/25/2024 +Workqueue: events_unbound __unix_gc +RIP: 0010:__unix_gc+0xc74/0xe80 net/unix/garbage.c:345 +Code: 8b 5c 24 50 e9 86 f8 ff ff e8 f8 e4 22 f8 31 d2 48 c7 c6 30 6a 69 89 4c 89 ef e8 97 ef ff ff e9 80 f9 ff ff e8 dd e4 22 f8 90 <0f> 0b 90 e9 7b fd ff ff 48 89 df e8 5c e7 7c f8 e9 d3 f8 ff ff e8 +RSP: 0018:ffffc9000b03fba0 EFLAGS: 00010293 +RAX: 0000000000000000 RBX: ffffc9000b03fc10 RCX: ffffffff816c493e +RDX: ffff88802c02d940 RSI: ffffffff896982f3 RDI: ffffc9000b03fb30 +RBP: ffffc9000b03fce0 R08: 0000000000000001 R09: fffff52001607f66 +R10: 0000000000000003 R11: 0000000000000002 R12: dffffc0000000000 +R13: ffffc9000b03fc10 R14: ffffc9000b03fc10 R15: 0000000000000001 +FS: 0000000000000000(0000) GS:ffff8880b9400000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 00005559c8677a60 CR3: 000000000d57a000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + process_one_work+0x889/0x15e0 kernel/workqueue.c:2633 + process_scheduled_works kernel/workqueue.c:2706 [inline] + worker_thread+0x8b9/0x12a0 kernel/workqueue.c:2787 + kthread+0x2c6/0x3b0 kernel/kthread.c:388 + ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x1b/0x30 arch/x86/entry/entry_64.S:242 + + +Reported-by: syzbot+fa3ef895554bdbfd1183@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=fa3ef895554bdbfd1183 +Fixes: 2aab4b969002 ("af_unix: fix struct pid leaks in OOB support") +Signed-off-by: Kuniyuki Iwashima +Reviewed-by: Eric Dumazet +Link: https://lore.kernel.org/r/20240203183149.63573-1-kuniyu@amazon.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/unix/garbage.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/net/unix/garbage.c b/net/unix/garbage.c +index 2405f0f9af31..8f63f0b4bf01 100644 +--- a/net/unix/garbage.c ++++ b/net/unix/garbage.c +@@ -314,6 +314,17 @@ void unix_gc(void) + /* Here we are. Hitlist is filled. Die. */ + __skb_queue_purge(&hitlist); + ++#if IS_ENABLED(CONFIG_AF_UNIX_OOB) ++ list_for_each_entry_safe(u, next, &gc_candidates, link) { ++ struct sk_buff *skb = u->oob_skb; ++ ++ if (skb) { ++ u->oob_skb = NULL; ++ kfree_skb(skb); ++ } ++ } ++#endif ++ + spin_lock(&unix_gc_lock); + + /* There could be io_uring registered files, just push them back to +-- +2.43.0 + diff --git a/queue-6.6/atm-idt77252-fix-a-memleak-in-open_card_ubr0.patch b/queue-6.6/atm-idt77252-fix-a-memleak-in-open_card_ubr0.patch new file mode 100644 index 00000000000..156f959f30b --- /dev/null +++ b/queue-6.6/atm-idt77252-fix-a-memleak-in-open_card_ubr0.patch @@ -0,0 +1,46 @@ +From 3a0270c308aa5064985c0f91dbb68d74ecdc264d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 20:41:05 +0800 +Subject: atm: idt77252: fix a memleak in open_card_ubr0 + +From: Zhipeng Lu + +[ Upstream commit f3616173bf9be9bf39d131b120d6eea4e6324cb5 ] + +When alloc_scq fails, card->vcs[0] (i.e. vc) should be freed. Otherwise, +in the following call chain: + +idt77252_init_one + |-> idt77252_dev_open + |-> open_card_ubr0 + |-> alloc_scq [failed] + |-> deinit_card + |-> vfree(card->vcs); + +card->vcs is freed and card->vcs[0] is leaked. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Zhipeng Lu +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/atm/idt77252.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c +index e327a0229dc1..e7f713cd70d3 100644 +--- a/drivers/atm/idt77252.c ++++ b/drivers/atm/idt77252.c +@@ -2930,6 +2930,8 @@ open_card_ubr0(struct idt77252_dev *card) + vc->scq = alloc_scq(card, vc->class); + if (!vc->scq) { + printk("%s: can't get SCQ.\n", card->name); ++ kfree(card->vcs[0]); ++ card->vcs[0] = NULL; + return -ENOMEM; + } + +-- +2.43.0 + diff --git a/queue-6.6/drm-amd-display-add-null-test-for-timing-generator-i.patch b/queue-6.6/drm-amd-display-add-null-test-for-timing-generator-i.patch new file mode 100644 index 00000000000..7aa6c41fc30 --- /dev/null +++ b/queue-6.6/drm-amd-display-add-null-test-for-timing-generator-i.patch @@ -0,0 +1,77 @@ +From ade06e2d1c2d90d3463ffdf165f7d28fb4124319 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Jan 2024 08:49:41 +0530 +Subject: drm/amd/display: Add NULL test for 'timing generator' in + 'dcn21_set_pipe()' + +From: Srinivasan Shanmugam + +[ Upstream commit 66951d98d9bf45ba25acf37fe0747253fafdf298 ] + +In "u32 otg_inst = pipe_ctx->stream_res.tg->inst;" +pipe_ctx->stream_res.tg could be NULL, it is relying on the caller to +ensure the tg is not NULL. + +Fixes: 474ac4a875ca ("drm/amd/display: Implement some asic specific abm call backs.") +Cc: Yongqiang Sun +Cc: Anthony Koo +Cc: Rodrigo Siqueira +Cc: Aurabindo Pillai +Signed-off-by: Srinivasan Shanmugam +Reviewed-by: Anthony Koo +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + .../drm/amd/display/dc/dcn21/dcn21_hwseq.c | 24 +++++++++++-------- + 1 file changed, 14 insertions(+), 10 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +index 7238930e6383..1b08749b084b 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +@@ -206,28 +206,32 @@ void dcn21_set_abm_immediate_disable(struct pipe_ctx *pipe_ctx) + void dcn21_set_pipe(struct pipe_ctx *pipe_ctx) + { + struct abm *abm = pipe_ctx->stream_res.abm; +- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; ++ struct timing_generator *tg = pipe_ctx->stream_res.tg; + struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; + struct dmcu *dmcu = pipe_ctx->stream->ctx->dc->res_pool->dmcu; ++ uint32_t otg_inst; ++ ++ if (!abm && !tg && !panel_cntl) ++ return; ++ ++ otg_inst = tg->inst; + + if (dmcu) { + dce110_set_pipe(pipe_ctx); + return; + } + +- if (abm && panel_cntl) { +- if (abm->funcs && abm->funcs->set_pipe_ex) { +- abm->funcs->set_pipe_ex(abm, ++ if (abm->funcs && abm->funcs->set_pipe_ex) { ++ abm->funcs->set_pipe_ex(abm, + otg_inst, + SET_ABM_PIPE_NORMAL, + panel_cntl->inst, + panel_cntl->pwrseq_inst); +- } else { +- dmub_abm_set_pipe(abm, otg_inst, +- SET_ABM_PIPE_NORMAL, +- panel_cntl->inst, +- panel_cntl->pwrseq_inst); +- } ++ } else { ++ dmub_abm_set_pipe(abm, otg_inst, ++ SET_ABM_PIPE_NORMAL, ++ panel_cntl->inst, ++ panel_cntl->pwrseq_inst); + } + } + +-- +2.43.0 + diff --git a/queue-6.6/drm-amd-display-fix-panel_cntl-could-be-null-in-dcn2.patch b/queue-6.6/drm-amd-display-fix-panel_cntl-could-be-null-in-dcn2.patch new file mode 100644 index 00000000000..02ba28c7f7b --- /dev/null +++ b/queue-6.6/drm-amd-display-fix-panel_cntl-could-be-null-in-dcn2.patch @@ -0,0 +1,91 @@ +From 245f04741efe9ee89c08c23aa3b1010a9ca3aef7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Jan 2024 18:34:01 +0530 +Subject: drm/amd/display: Fix 'panel_cntl' could be null in + 'dcn21_set_backlight_level()' + +From: Srinivasan Shanmugam + +[ Upstream commit e96fddb32931d007db12b1fce9b5e8e4c080401b ] + +'panel_cntl' structure used to control the display panel could be null, +dereferencing it could lead to a null pointer access. + +Fixes the below: +drivers/gpu/drm/amd/amdgpu/../display/dc/hwss/dcn21/dcn21_hwseq.c:269 dcn21_set_backlight_level() error: we previously assumed 'panel_cntl' could be null (see line 250) + +Fixes: 474ac4a875ca ("drm/amd/display: Implement some asic specific abm call backs.") +Cc: Yongqiang Sun +Cc: Anthony Koo +Cc: Rodrigo Siqueira +Cc: Aurabindo Pillai +Signed-off-by: Srinivasan Shanmugam +Reviewed-by: Anthony Koo +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + .../drm/amd/display/dc/dcn21/dcn21_hwseq.c | 39 ++++++++++--------- + 1 file changed, 20 insertions(+), 19 deletions(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +index f99b1bc49694..7238930e6383 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hwseq.c +@@ -237,34 +237,35 @@ bool dcn21_set_backlight_level(struct pipe_ctx *pipe_ctx, + { + struct dc_context *dc = pipe_ctx->stream->ctx; + struct abm *abm = pipe_ctx->stream_res.abm; ++ struct timing_generator *tg = pipe_ctx->stream_res.tg; + struct panel_cntl *panel_cntl = pipe_ctx->stream->link->panel_cntl; ++ uint32_t otg_inst; ++ ++ if (!abm && !tg && !panel_cntl) ++ return false; ++ ++ otg_inst = tg->inst; + + if (dc->dc->res_pool->dmcu) { + dce110_set_backlight_level(pipe_ctx, backlight_pwm_u16_16, frame_ramp); + return true; + } + +- if (abm != NULL) { +- uint32_t otg_inst = pipe_ctx->stream_res.tg->inst; +- +- if (abm && panel_cntl) { +- if (abm->funcs && abm->funcs->set_pipe_ex) { +- abm->funcs->set_pipe_ex(abm, +- otg_inst, +- SET_ABM_PIPE_NORMAL, +- panel_cntl->inst, +- panel_cntl->pwrseq_inst); +- } else { +- dmub_abm_set_pipe(abm, +- otg_inst, +- SET_ABM_PIPE_NORMAL, +- panel_cntl->inst, +- panel_cntl->pwrseq_inst); +- } +- } ++ if (abm->funcs && abm->funcs->set_pipe_ex) { ++ abm->funcs->set_pipe_ex(abm, ++ otg_inst, ++ SET_ABM_PIPE_NORMAL, ++ panel_cntl->inst, ++ panel_cntl->pwrseq_inst); ++ } else { ++ dmub_abm_set_pipe(abm, ++ otg_inst, ++ SET_ABM_PIPE_NORMAL, ++ panel_cntl->inst, ++ panel_cntl->pwrseq_inst); + } + +- if (abm && abm->funcs && abm->funcs->set_backlight_level_pwm) ++ if (abm->funcs && abm->funcs->set_backlight_level_pwm) + abm->funcs->set_backlight_level_pwm(abm, backlight_pwm_u16_16, + frame_ramp, 0, panel_cntl->inst); + else +-- +2.43.0 + diff --git a/queue-6.6/drm-amd-display-implement-bounds-check-for-stream-en.patch b/queue-6.6/drm-amd-display-implement-bounds-check-for-stream-en.patch new file mode 100644 index 00000000000..41c99c92b5d --- /dev/null +++ b/queue-6.6/drm-amd-display-implement-bounds-check-for-stream-en.patch @@ -0,0 +1,55 @@ +From 4c110039ccb4eb664f528d6f37859bebe75d583e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Feb 2024 10:20:57 +0530 +Subject: drm/amd/display: Implement bounds check for stream encoder creation + in DCN301 + +From: Srinivasan Shanmugam + +[ Upstream commit 58fca355ad37dcb5f785d9095db5f748b79c5dc2 ] + +'stream_enc_regs' array is an array of dcn10_stream_enc_registers +structures. The array is initialized with four elements, corresponding +to the four calls to stream_enc_regs() in the array initializer. This +means that valid indices for this array are 0, 1, 2, and 3. + +The error message 'stream_enc_regs' 4 <= 5 below, is indicating that +there is an attempt to access this array with an index of 5, which is +out of bounds. This could lead to undefined behavior + +Here, eng_id is used as an index to access the stream_enc_regs array. If +eng_id is 5, this would result in an out-of-bounds access on the +stream_enc_regs array. + +Thus fixing Buffer overflow error in dcn301_stream_encoder_create +reported by Smatch: +drivers/gpu/drm/amd/amdgpu/../display/dc/resource/dcn301/dcn301_resource.c:1011 dcn301_stream_encoder_create() error: buffer overflow 'stream_enc_regs' 4 <= 5 + +Fixes: 3a83e4e64bb1 ("drm/amd/display: Add dcn3.01 support to DC (v2)") +Cc: Roman Li +Cc: Rodrigo Siqueira +Cc: Aurabindo Pillai +Signed-off-by: Srinivasan Shanmugam +Reviewed-by: Roman Li +Signed-off-by: Alex Deucher +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +index 79d6697d13b6..9485fda890cd 100644 +--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c ++++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c +@@ -996,7 +996,7 @@ static struct stream_encoder *dcn301_stream_encoder_create(enum engine_id eng_id + vpg = dcn301_vpg_create(ctx, vpg_inst); + afmt = dcn301_afmt_create(ctx, afmt_inst); + +- if (!enc1 || !vpg || !afmt) { ++ if (!enc1 || !vpg || !afmt || eng_id >= ARRAY_SIZE(stream_enc_regs)) { + kfree(enc1); + kfree(vpg); + kfree(afmt); +-- +2.43.0 + diff --git a/queue-6.6/drm-i915-gvt-fix-uninitialized-variable-in-handle_mm.patch b/queue-6.6/drm-i915-gvt-fix-uninitialized-variable-in-handle_mm.patch new file mode 100644 index 00000000000..086631804bf --- /dev/null +++ b/queue-6.6/drm-i915-gvt-fix-uninitialized-variable-in-handle_mm.patch @@ -0,0 +1,41 @@ +From 975529499a37ce64c84e9bcec40fac9c579daaf5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jan 2024 11:41:47 +0300 +Subject: drm/i915/gvt: Fix uninitialized variable in handle_mmio() + +From: Dan Carpenter + +[ Upstream commit 47caa96478b99d6d1199b89467cc3e5a6cc754ee ] + +This code prints the wrong variable in the warning message. It should +print "i" instead of "info->offset". On the first iteration "info" is +uninitialized leading to a crash and on subsequent iterations it prints +the previous offset instead of the current one. + +Fixes: e0f74ed4634d ("i915/gvt: Separate the MMIO tracking table from GVT-g") +Signed-off-by: Dan Carpenter +Signed-off-by: Zhenyu Wang +Link: http://patchwork.freedesktop.org/patch/msgid/11957c20-b178-4027-9b0a-e32e9591dd7c@moroto.mountain +Reviewed-by: Zhenyu Wang +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gvt/handlers.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c +index a9f7fa9b90bd..d30f8814d9b1 100644 +--- a/drivers/gpu/drm/i915/gvt/handlers.c ++++ b/drivers/gpu/drm/i915/gvt/handlers.c +@@ -2850,8 +2850,7 @@ static int handle_mmio(struct intel_gvt_mmio_table_iter *iter, u32 offset, + for (i = start; i < end; i += 4) { + p = intel_gvt_find_mmio_info(gvt, i); + if (p) { +- WARN(1, "dup mmio definition offset %x\n", +- info->offset); ++ WARN(1, "dup mmio definition offset %x\n", i); + + /* We return -EEXIST here to make GVT-g load fail. + * So duplicated MMIO can be found as soon as +-- +2.43.0 + diff --git a/queue-6.6/drm-msm-dp-return-correct-colorimetry-for-dp_test_dy.patch b/queue-6.6/drm-msm-dp-return-correct-colorimetry-for-dp_test_dy.patch new file mode 100644 index 00000000000..04e1b451269 --- /dev/null +++ b/queue-6.6/drm-msm-dp-return-correct-colorimetry-for-dp_test_dy.patch @@ -0,0 +1,92 @@ +From da737f8ffd8d9a9c73f62083c18116330a5a1d6e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 13:13:30 -0800 +Subject: drm/msm/dp: return correct Colorimetry for DP_TEST_DYNAMIC_RANGE_CEA + case + +From: Kuogee Hsieh + +[ Upstream commit fcccdafd91f8bdde568b86ff70848cf83f029add ] + +MSA MISC0 bit 1 to 7 contains Colorimetry Indicator Field. +dp_link_get_colorimetry_config() returns wrong colorimetry value +in the DP_TEST_DYNAMIC_RANGE_CEA case in the current implementation. +Hence fix this problem by having dp_link_get_colorimetry_config() +return defined CEA RGB colorimetry value in the case of +DP_TEST_DYNAMIC_RANGE_CEA. + +Changes in V2: +-- drop retrieving colorimetry from colorspace +-- drop dr = link->dp_link.test_video.test_dyn_range assignment + +Changes in V3: +-- move defined MISCr0a Colorimetry vale to dp_reg.h +-- rewording commit title +-- rewording commit text to more precise describe this patch + +Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") +Signed-off-by: Kuogee Hsieh +Reviewed-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/574888/ +Link: https://lore.kernel.org/r/1705526010-597-1-git-send-email-quic_khsieh@quicinc.com +Signed-off-by: Abhinav Kumar +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/dp/dp_link.c | 12 +++++++----- + drivers/gpu/drm/msm/dp/dp_reg.h | 3 +++ + 2 files changed, 10 insertions(+), 5 deletions(-) + +diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c +index 487867979557..25950171caf3 100644 +--- a/drivers/gpu/drm/msm/dp/dp_link.c ++++ b/drivers/gpu/drm/msm/dp/dp_link.c +@@ -7,6 +7,7 @@ + + #include + ++#include "dp_reg.h" + #include "dp_link.h" + #include "dp_panel.h" + +@@ -1114,7 +1115,7 @@ int dp_link_process_request(struct dp_link *dp_link) + + int dp_link_get_colorimetry_config(struct dp_link *dp_link) + { +- u32 cc; ++ u32 cc = DP_MISC0_COLORIMERY_CFG_LEGACY_RGB; + struct dp_link_private *link; + + if (!dp_link) { +@@ -1128,10 +1129,11 @@ int dp_link_get_colorimetry_config(struct dp_link *dp_link) + * Unless a video pattern CTS test is ongoing, use RGB_VESA + * Only RGB_VESA and RGB_CEA supported for now + */ +- if (dp_link_is_video_pattern_requested(link)) +- cc = link->dp_link.test_video.test_dyn_range; +- else +- cc = DP_TEST_DYNAMIC_RANGE_VESA; ++ if (dp_link_is_video_pattern_requested(link)) { ++ if (link->dp_link.test_video.test_dyn_range & ++ DP_TEST_DYNAMIC_RANGE_CEA) ++ cc = DP_MISC0_COLORIMERY_CFG_CEA_RGB; ++ } + + return cc; + } +diff --git a/drivers/gpu/drm/msm/dp/dp_reg.h b/drivers/gpu/drm/msm/dp/dp_reg.h +index ea85a691e72b..78785ed4b40c 100644 +--- a/drivers/gpu/drm/msm/dp/dp_reg.h ++++ b/drivers/gpu/drm/msm/dp/dp_reg.h +@@ -143,6 +143,9 @@ + #define DP_MISC0_COLORIMETRY_CFG_SHIFT (0x00000001) + #define DP_MISC0_TEST_BITS_DEPTH_SHIFT (0x00000005) + ++#define DP_MISC0_COLORIMERY_CFG_LEGACY_RGB (0) ++#define DP_MISC0_COLORIMERY_CFG_CEA_RGB (0x04) ++ + #define REG_DP_VALID_BOUNDARY (0x00000030) + #define REG_DP_VALID_BOUNDARY_2 (0x00000034) + +-- +2.43.0 + diff --git a/queue-6.6/drm-msm-dpu-check-for-valid-hw_pp-in-dpu_encoder_hel.patch b/queue-6.6/drm-msm-dpu-check-for-valid-hw_pp-in-dpu_encoder_hel.patch new file mode 100644 index 00000000000..7df4ae7017d --- /dev/null +++ b/queue-6.6/drm-msm-dpu-check-for-valid-hw_pp-in-dpu_encoder_hel.patch @@ -0,0 +1,53 @@ +From 06de6868c5db05b2f36e5fb57bed14c19e8a6535 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 11:41:09 -0800 +Subject: drm/msm/dpu: check for valid hw_pp in dpu_encoder_helper_phys_cleanup + +From: Abhinav Kumar + +[ Upstream commit 7f3d03c48b1eb6bc45ab20ca98b8b11be25f9f52 ] + +The commit 8b45a26f2ba9 ("drm/msm/dpu: reserve cdm blocks for writeback +in case of YUV output") introduced a smatch warning about another +conditional block in dpu_encoder_helper_phys_cleanup() which had assumed +hw_pp will always be valid which may not necessarily be true. + +Lets fix the other conditional block by making sure hw_pp is valid +before dereferencing it. + +Reported-by: Dan Carpenter +Fixes: ae4d721ce100 ("drm/msm/dpu: add an API to reset the encoder related hw blocks") +Reviewed-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/574878/ +Link: https://lore.kernel.org/r/20240117194109.21609-1-quic_abhinavk@quicinc.com +Signed-off-by: Abhinav Kumar +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +index 7d4cf81fd31c..ca4e5eae8e06 100644 +--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c ++++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +@@ -2063,7 +2063,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) + } + + /* reset the merge 3D HW block */ +- if (phys_enc->hw_pp->merge_3d) { ++ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) { + phys_enc->hw_pp->merge_3d->ops.setup_3d_mode(phys_enc->hw_pp->merge_3d, + BLEND_3D_NONE); + if (phys_enc->hw_ctl->ops.update_pending_flush_merge_3d) +@@ -2085,7 +2085,7 @@ void dpu_encoder_helper_phys_cleanup(struct dpu_encoder_phys *phys_enc) + if (phys_enc->hw_wb) + intf_cfg.wb = phys_enc->hw_wb->idx; + +- if (phys_enc->hw_pp->merge_3d) ++ if (phys_enc->hw_pp && phys_enc->hw_pp->merge_3d) + intf_cfg.merge_3d = phys_enc->hw_pp->merge_3d->idx; + + if (ctl->ops.reset_intf_cfg) +-- +2.43.0 + diff --git a/queue-6.6/drm-msms-dp-fixed-link-clock-divider-bits-be-over-wr.patch b/queue-6.6/drm-msms-dp-fixed-link-clock-divider-bits-be-over-wr.patch new file mode 100644 index 00000000000..c6719e44b47 --- /dev/null +++ b/queue-6.6/drm-msms-dp-fixed-link-clock-divider-bits-be-over-wr.patch @@ -0,0 +1,80 @@ +From b245d44ab823510cb644da7a59d5b43eac341df2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jan 2024 12:18:51 -0800 +Subject: drm/msms/dp: fixed link clock divider bits be over written in BPC + unknown case + +From: Kuogee Hsieh + +[ Upstream commit 77e8aad5519e04f6c1e132aaec1c5f8faf41844f ] + +Since the value of DP_TEST_BIT_DEPTH_8 is already left shifted, in the +BPC unknown case, the additional shift causes spill over to the other +bits of the [DP_CONFIGURATION_CTRL] register. +Fix this by changing the return value of dp_link_get_test_bits_depth() +in the BPC unknown case to (DP_TEST_BIT_DEPTH_8 >> DP_TEST_BIT_DEPTH_SHIFT). + +Fixes: c943b4948b58 ("drm/msm/dp: add displayPort driver support") +Signed-off-by: Kuogee Hsieh +Reviewed-by: Abhinav Kumar +Reviewed-by: Dmitry Baryshkov +Patchwork: https://patchwork.freedesktop.org/patch/573989/ +Link: https://lore.kernel.org/r/1704917931-30133-1-git-send-email-quic_khsieh@quicinc.com +[quic_abhinavk@quicinc.com: fix minor checkpatch warning to align with opening braces] +Signed-off-by: Abhinav Kumar +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/msm/dp/dp_ctrl.c | 5 ----- + drivers/gpu/drm/msm/dp/dp_link.c | 10 +++++++--- + 2 files changed, 7 insertions(+), 8 deletions(-) + +diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c +index 77a8d9366ed7..fb588fde298a 100644 +--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c ++++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c +@@ -135,11 +135,6 @@ static void dp_ctrl_config_ctrl(struct dp_ctrl_private *ctrl) + tbd = dp_link_get_test_bits_depth(ctrl->link, + ctrl->panel->dp_mode.bpp); + +- if (tbd == DP_TEST_BIT_DEPTH_UNKNOWN) { +- pr_debug("BIT_DEPTH not set. Configure default\n"); +- tbd = DP_TEST_BIT_DEPTH_8; +- } +- + config |= tbd << DP_CONFIGURATION_CTRL_BPC_SHIFT; + + /* Num of Lanes */ +diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c +index 6375daaeb98e..487867979557 100644 +--- a/drivers/gpu/drm/msm/dp/dp_link.c ++++ b/drivers/gpu/drm/msm/dp/dp_link.c +@@ -1211,6 +1211,9 @@ void dp_link_reset_phy_params_vx_px(struct dp_link *dp_link) + u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) + { + u32 tbd; ++ struct dp_link_private *link; ++ ++ link = container_of(dp_link, struct dp_link_private, dp_link); + + /* + * Few simplistic rules and assumptions made here: +@@ -1228,12 +1231,13 @@ u32 dp_link_get_test_bits_depth(struct dp_link *dp_link, u32 bpp) + tbd = DP_TEST_BIT_DEPTH_10; + break; + default: +- tbd = DP_TEST_BIT_DEPTH_UNKNOWN; ++ drm_dbg_dp(link->drm_dev, "bpp=%d not supported, use bpc=8\n", ++ bpp); ++ tbd = DP_TEST_BIT_DEPTH_8; + break; + } + +- if (tbd != DP_TEST_BIT_DEPTH_UNKNOWN) +- tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); ++ tbd = (tbd >> DP_TEST_BIT_DEPTH_SHIFT); + + return tbd; + } +-- +2.43.0 + diff --git a/queue-6.6/fs-ntfs3-fix-an-null-dereference-bug.patch b/queue-6.6/fs-ntfs3-fix-an-null-dereference-bug.patch new file mode 100644 index 00000000000..21e6f764b33 --- /dev/null +++ b/queue-6.6/fs-ntfs3-fix-an-null-dereference-bug.patch @@ -0,0 +1,40 @@ +From b4ef169343a15887a80a00406da2fdb26ac2623f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 17 Oct 2023 17:04:39 +0300 +Subject: fs/ntfs3: Fix an NULL dereference bug + +From: Dan Carpenter + +[ Upstream commit b2dd7b953c25ffd5912dda17e980e7168bebcf6c ] + +The issue here is when this is called from ntfs_load_attr_list(). The +"size" comes from le32_to_cpu(attr->res.data_size) so it can't overflow +on a 64bit systems but on 32bit systems the "+ 1023" can overflow and +the result is zero. This means that the kmalloc will succeed by +returning the ZERO_SIZE_PTR and then the memcpy() will crash with an +Oops on the next line. + +Fixes: be71b5cba2e6 ("fs/ntfs3: Add attrib operations") +Signed-off-by: Dan Carpenter +Signed-off-by: Konstantin Komarov +Signed-off-by: Sasha Levin +--- + fs/ntfs3/ntfs_fs.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h +index 0e6a2777870c..29a9b0b29e4f 100644 +--- a/fs/ntfs3/ntfs_fs.h ++++ b/fs/ntfs3/ntfs_fs.h +@@ -473,7 +473,7 @@ bool al_delete_le(struct ntfs_inode *ni, enum ATTR_TYPE type, CLST vcn, + int al_update(struct ntfs_inode *ni, int sync); + static inline size_t al_aligned(size_t size) + { +- return (size + 1023) & ~(size_t)1023; ++ return size_add(size, 1023) & ~(size_t)1023; + } + + /* Globals from bitfunc.c */ +-- +2.43.0 + diff --git a/queue-6.6/hwmon-aspeed-pwm-tacho-mutex-for-tach-reading.patch b/queue-6.6/hwmon-aspeed-pwm-tacho-mutex-for-tach-reading.patch new file mode 100644 index 00000000000..7aaa7f8dd63 --- /dev/null +++ b/queue-6.6/hwmon-aspeed-pwm-tacho-mutex-for-tach-reading.patch @@ -0,0 +1,65 @@ +From 7228527802330668d26f651a70479d9fca9ba6f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 3 Nov 2023 11:30:55 +0100 +Subject: hwmon: (aspeed-pwm-tacho) mutex for tach reading + +From: Loic Prylli + +[ Upstream commit 1168491e7f53581ba7b6014a39a49cfbbb722feb ] + +the ASPEED_PTCR_RESULT Register can only hold the result for a +single fan input. Adding a mutex to protect the register until the +reading is done. + +Signed-off-by: Loic Prylli +Signed-off-by: Alexander Hansen +Fixes: 2d7a548a3eff ("drivers: hwmon: Support for ASPEED PWM/Fan tach") +Link: https://lore.kernel.org/r/121d888762a1232ef403cf35230ccf7b3887083a.1699007401.git.alexander.hansen@9elements.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/aspeed-pwm-tacho.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/hwmon/aspeed-pwm-tacho.c b/drivers/hwmon/aspeed-pwm-tacho.c +index 997df4b40509..b2ae2176f11f 100644 +--- a/drivers/hwmon/aspeed-pwm-tacho.c ++++ b/drivers/hwmon/aspeed-pwm-tacho.c +@@ -193,6 +193,8 @@ struct aspeed_pwm_tacho_data { + u8 fan_tach_ch_source[16]; + struct aspeed_cooling_device *cdev[8]; + const struct attribute_group *groups[3]; ++ /* protects access to shared ASPEED_PTCR_RESULT */ ++ struct mutex tach_lock; + }; + + enum type { TYPEM, TYPEN, TYPEO }; +@@ -527,6 +529,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, + u8 fan_tach_ch_source, type, mode, both; + int ret; + ++ mutex_lock(&priv->tach_lock); ++ + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0); + regmap_write(priv->regmap, ASPEED_PTCR_TRIGGER, 0x1 << fan_tach_ch); + +@@ -544,6 +548,8 @@ static int aspeed_get_fan_tach_ch_rpm(struct aspeed_pwm_tacho_data *priv, + ASPEED_RPM_STATUS_SLEEP_USEC, + usec); + ++ mutex_unlock(&priv->tach_lock); ++ + /* return -ETIMEDOUT if we didn't get an answer. */ + if (ret) + return ret; +@@ -903,6 +909,7 @@ static int aspeed_pwm_tacho_probe(struct platform_device *pdev) + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; ++ mutex_init(&priv->tach_lock); + priv->regmap = devm_regmap_init(dev, NULL, (__force void *)regs, + &aspeed_pwm_tacho_regmap_config); + if (IS_ERR(priv->regmap)) +-- +2.43.0 + diff --git a/queue-6.6/hwmon-coretemp-fix-bogus-core_id-to-attr-name-mappin.patch b/queue-6.6/hwmon-coretemp-fix-bogus-core_id-to-attr-name-mappin.patch new file mode 100644 index 00000000000..44ebe0dc9c2 --- /dev/null +++ b/queue-6.6/hwmon-coretemp-fix-bogus-core_id-to-attr-name-mappin.patch @@ -0,0 +1,151 @@ +From dbd182f989d9ebc131040e86edce391e08672697 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 17:21:35 +0800 +Subject: hwmon: (coretemp) Fix bogus core_id to attr name mapping + +From: Zhang Rui + +[ Upstream commit fdaf0c8629d4524a168cb9e4ad4231875749b28c ] + +Before commit 7108b80a542b ("hwmon/coretemp: Handle large core ID +value"), there is a fixed mapping between +1. cpu_core_id +2. the index in pdata->core_data[] array +3. the sysfs attr name, aka "tempX_" +The later two always equal cpu_core_id + 2. + +After the commit, pdata->core_data[] index is got from ida so that it +can handle sparse core ids and support more cores within a package. + +However, the commit erroneously maps the sysfs attr name to +pdata->core_data[] index instead of cpu_core_id + 2. + +As a result, the code is not aligned with the comments, and brings user +visible changes in hwmon sysfs on systems with sparse core id. + +For example, before commit 7108b80a542b ("hwmon/coretemp: Handle large +core ID value"), +/sys/class/hwmon/hwmon2/temp2_label:Core 0 +/sys/class/hwmon/hwmon2/temp3_label:Core 1 +/sys/class/hwmon/hwmon2/temp4_label:Core 2 +/sys/class/hwmon/hwmon2/temp5_label:Core 3 +/sys/class/hwmon/hwmon2/temp6_label:Core 4 +/sys/class/hwmon/hwmon3/temp10_label:Core 8 +/sys/class/hwmon/hwmon3/temp11_label:Core 9 +after commit, +/sys/class/hwmon/hwmon2/temp2_label:Core 0 +/sys/class/hwmon/hwmon2/temp3_label:Core 1 +/sys/class/hwmon/hwmon2/temp4_label:Core 2 +/sys/class/hwmon/hwmon2/temp5_label:Core 3 +/sys/class/hwmon/hwmon2/temp6_label:Core 4 +/sys/class/hwmon/hwmon2/temp7_label:Core 8 +/sys/class/hwmon/hwmon2/temp8_label:Core 9 + +Restore the previous behavior and rework the code, comments and variable +names to avoid future confusions. + +Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") +Signed-off-by: Zhang Rui +Link: https://lore.kernel.org/r/20240202092144.71180-3-rui.zhang@intel.com +Signed-off-by: Guenter Roeck +Signed-off-by: Sasha Levin +--- + drivers/hwmon/coretemp.c | 32 +++++++++++++++++++------------- + 1 file changed, 19 insertions(+), 13 deletions(-) + +diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c +index e78c76919111..95f4c0b00b2d 100644 +--- a/drivers/hwmon/coretemp.c ++++ b/drivers/hwmon/coretemp.c +@@ -419,7 +419,7 @@ static ssize_t show_temp(struct device *dev, + } + + static int create_core_attrs(struct temp_data *tdata, struct device *dev, +- int attr_no) ++ int index) + { + int i; + static ssize_t (*const rd_ptr[TOTAL_ATTRS]) (struct device *dev, +@@ -431,13 +431,20 @@ static int create_core_attrs(struct temp_data *tdata, struct device *dev, + }; + + for (i = 0; i < tdata->attr_size; i++) { ++ /* ++ * We map the attr number to core id of the CPU ++ * The attr number is always core id + 2 ++ * The Pkgtemp will always show up as temp1_*, if available ++ */ ++ int attr_no = tdata->is_pkg_data ? 1 : tdata->cpu_core_id + 2; ++ + snprintf(tdata->attr_name[i], CORETEMP_NAME_LENGTH, + "temp%d_%s", attr_no, suffixes[i]); + sysfs_attr_init(&tdata->sd_attrs[i].dev_attr.attr); + tdata->sd_attrs[i].dev_attr.attr.name = tdata->attr_name[i]; + tdata->sd_attrs[i].dev_attr.attr.mode = 0444; + tdata->sd_attrs[i].dev_attr.show = rd_ptr[i]; +- tdata->sd_attrs[i].index = attr_no; ++ tdata->sd_attrs[i].index = index; + tdata->attrs[i] = &tdata->sd_attrs[i].dev_attr.attr; + } + tdata->attr_group.attrs = tdata->attrs; +@@ -495,26 +502,25 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, + struct platform_data *pdata = platform_get_drvdata(pdev); + struct cpuinfo_x86 *c = &cpu_data(cpu); + u32 eax, edx; +- int err, index, attr_no; ++ int err, index; + + if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) + return 0; + + /* +- * Find attr number for sysfs: +- * We map the attr number to core id of the CPU +- * The attr number is always core id + 2 +- * The Pkgtemp will always show up as temp1_*, if available ++ * Get the index of tdata in pdata->core_data[] ++ * tdata for package: pdata->core_data[1] ++ * tdata for core: pdata->core_data[2] .. pdata->core_data[NUM_REAL_CORES + 1] + */ + if (pkg_flag) { +- attr_no = PKG_SYSFS_ATTR_NO; ++ index = PKG_SYSFS_ATTR_NO; + } else { + index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); + if (index < 0) + return index; + + pdata->cpu_map[index] = topology_core_id(cpu); +- attr_no = index + BASE_SYSFS_ATTR_NO; ++ index += BASE_SYSFS_ATTR_NO; + } + + tdata = init_temp_data(cpu, pkg_flag); +@@ -540,20 +546,20 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, + if (get_ttarget(tdata, &pdev->dev) >= 0) + tdata->attr_size++; + +- pdata->core_data[attr_no] = tdata; ++ pdata->core_data[index] = tdata; + + /* Create sysfs interfaces */ +- err = create_core_attrs(tdata, pdata->hwmon_dev, attr_no); ++ err = create_core_attrs(tdata, pdata->hwmon_dev, index); + if (err) + goto exit_free; + + return 0; + exit_free: +- pdata->core_data[attr_no] = NULL; ++ pdata->core_data[index] = NULL; + kfree(tdata); + ida_free: + if (!pkg_flag) +- ida_free(&pdata->ida, index); ++ ida_free(&pdata->ida, index - BASE_SYSFS_ATTR_NO); + return err; + } + +-- +2.43.0 + diff --git a/queue-6.6/hwmon-coretemp-fix-out-of-bounds-memory-access.patch b/queue-6.6/hwmon-coretemp-fix-out-of-bounds-memory-access.patch new file mode 100644 index 00000000000..2e2c2fa644b --- /dev/null +++ b/queue-6.6/hwmon-coretemp-fix-out-of-bounds-memory-access.patch @@ -0,0 +1,52 @@ +From d9ceec2b4487b5b01a3e70da574d68ed65f71983 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 17:21:34 +0800 +Subject: hwmon: (coretemp) Fix out-of-bounds memory access + +From: Zhang Rui + +[ Upstream commit 4e440abc894585a34c2904a32cd54af1742311b3 ] + +Fix a bug that pdata->cpu_map[] is set before out-of-bounds check. +The problem might be triggered on systems with more than 128 cores per +package. + +Fixes: 7108b80a542b ("hwmon/coretemp: Handle large core ID value") +Signed-off-by: Zhang Rui +Cc: +Link: https://lore.kernel.org/r/20240202092144.71180-2-rui.zhang@intel.com +Signed-off-by: Guenter Roeck +Stable-dep-of: fdaf0c8629d4 ("hwmon: (coretemp) Fix bogus core_id to attr name mapping") +Signed-off-by: Sasha Levin +--- + drivers/hwmon/coretemp.c | 8 ++------ + 1 file changed, 2 insertions(+), 6 deletions(-) + +diff --git a/drivers/hwmon/coretemp.c b/drivers/hwmon/coretemp.c +index ba82d1e79c13..e78c76919111 100644 +--- a/drivers/hwmon/coretemp.c ++++ b/drivers/hwmon/coretemp.c +@@ -509,18 +509,14 @@ static int create_core_data(struct platform_device *pdev, unsigned int cpu, + if (pkg_flag) { + attr_no = PKG_SYSFS_ATTR_NO; + } else { +- index = ida_alloc(&pdata->ida, GFP_KERNEL); ++ index = ida_alloc_max(&pdata->ida, NUM_REAL_CORES - 1, GFP_KERNEL); + if (index < 0) + return index; ++ + pdata->cpu_map[index] = topology_core_id(cpu); + attr_no = index + BASE_SYSFS_ATTR_NO; + } + +- if (attr_no > MAX_CORE_DATA - 1) { +- err = -ERANGE; +- goto ida_free; +- } +- + tdata = init_temp_data(cpu, pkg_flag); + if (!tdata) { + err = -ENOMEM; +-- +2.43.0 + diff --git a/queue-6.6/inet-read-sk-sk_family-once-in-inet_recv_error.patch b/queue-6.6/inet-read-sk-sk_family-once-in-inet_recv_error.patch new file mode 100644 index 00000000000..7c3ad83a282 --- /dev/null +++ b/queue-6.6/inet-read-sk-sk_family-once-in-inet_recv_error.patch @@ -0,0 +1,46 @@ +From 54c3e640e59bea15e5230139024a1abfac7c804b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 09:54:04 +0000 +Subject: inet: read sk->sk_family once in inet_recv_error() + +From: Eric Dumazet + +[ Upstream commit eef00a82c568944f113f2de738156ac591bbd5cd ] + +inet_recv_error() is called without holding the socket lock. + +IPv6 socket could mutate to IPv4 with IPV6_ADDRFORM +socket option and trigger a KCSAN warning. + +Fixes: f4713a3dfad0 ("net-timestamp: make tcp_recvmsg call ipv6_recv_error for AF_INET6 socks") +Signed-off-by: Eric Dumazet +Cc: Willem de Bruijn +Reviewed-by: Willem de Bruijn +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/af_inet.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c +index 1c58bd72e124..e59962f34caa 100644 +--- a/net/ipv4/af_inet.c ++++ b/net/ipv4/af_inet.c +@@ -1628,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp); + + int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) + { +- if (sk->sk_family == AF_INET) ++ unsigned int family = READ_ONCE(sk->sk_family); ++ ++ if (family == AF_INET) + return ip_recv_error(sk, msg, len, addr_len); + #if IS_ENABLED(CONFIG_IPV6) +- if (sk->sk_family == AF_INET6) ++ if (family == AF_INET6) + return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); + #endif + return -EINVAL; +-- +2.43.0 + diff --git a/queue-6.6/libceph-just-wait-for-more-data-to-be-available-on-t.patch b/queue-6.6/libceph-just-wait-for-more-data-to-be-available-on-t.patch new file mode 100644 index 00000000000..d08a3b2b945 --- /dev/null +++ b/queue-6.6/libceph-just-wait-for-more-data-to-be-available-on-t.patch @@ -0,0 +1,174 @@ +From ea2ac8c6352efe881b2783c4aa6c7e6e222e3304 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Dec 2023 16:01:03 +0800 +Subject: libceph: just wait for more data to be available on the socket + +From: Xiubo Li + +[ Upstream commit 8e46a2d068c92a905d01cbb018b00d66991585ab ] + +A short read may occur while reading the message footer from the +socket. Later, when the socket is ready for another read, the +messenger invokes all read_partial_*() handlers, including +read_partial_sparse_msg_data(). The expectation is that +read_partial_sparse_msg_data() would bail, allowing the messenger to +invoke read_partial() for the footer and pick up where it left off. + +However read_partial_sparse_msg_data() violates that and ends up +calling into the state machine in the OSD client. The sparse-read +state machine assumes that it's a new op and interprets some piece of +the footer as the sparse-read header and returns bogus extents/data +length, etc. + +To determine whether read_partial_sparse_msg_data() should bail, let's +reuse cursor->total_resid. Because once it reaches to zero that means +all the extents and data have been successfully received in last read, +else it could break out when partially reading any of the extents and +data. And then osd_sparse_read() could continue where it left off. + +[ idryomov: changelog ] + +Link: https://tracker.ceph.com/issues/63586 +Fixes: d396f89db39a ("libceph: add sparse read support to msgr1") +Signed-off-by: Xiubo Li +Reviewed-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Signed-off-by: Sasha Levin +--- + include/linux/ceph/messenger.h | 2 +- + net/ceph/messenger_v1.c | 25 +++++++++++++------------ + net/ceph/messenger_v2.c | 4 ++-- + net/ceph/osd_client.c | 9 +++------ + 4 files changed, 19 insertions(+), 21 deletions(-) + +diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h +index 2eaaabbe98cb..1717cc57cdac 100644 +--- a/include/linux/ceph/messenger.h ++++ b/include/linux/ceph/messenger.h +@@ -283,7 +283,7 @@ struct ceph_msg { + struct kref kref; + bool more_to_follow; + bool needs_out_seq; +- bool sparse_read; ++ u64 sparse_read_total; + int front_alloc_len; + + struct ceph_msgpool *pool; +diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c +index 4cb60bacf5f5..0cb61c76b9b8 100644 +--- a/net/ceph/messenger_v1.c ++++ b/net/ceph/messenger_v1.c +@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con) + static void prepare_message_data(struct ceph_msg *msg, u32 data_len) + { + /* Initialize data cursor if it's not a sparse read */ +- if (!msg->sparse_read) +- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len); ++ u64 len = msg->sparse_read_total ? : data_len; ++ ++ ceph_msg_data_cursor_init(&msg->cursor, msg, len); + } + + /* +@@ -1036,7 +1037,7 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) + if (do_datacrc) + crc = con->in_data_crc; + +- do { ++ while (cursor->total_resid) { + if (con->v1.in_sr_kvec.iov_base) + ret = read_partial_message_chunk(con, + &con->v1.in_sr_kvec, +@@ -1044,23 +1045,23 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con) + &crc); + else if (cursor->sr_resid > 0) + ret = read_partial_sparse_msg_extent(con, &crc); +- +- if (ret <= 0) { +- if (do_datacrc) +- con->in_data_crc = crc; +- return ret; +- } ++ if (ret <= 0) ++ break; + + memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec)); + ret = con->ops->sparse_read(con, cursor, + (char **)&con->v1.in_sr_kvec.iov_base); ++ if (ret <= 0) { ++ ret = ret ? ret : 1; /* must return > 0 to indicate success */ ++ break; ++ } + con->v1.in_sr_len = ret; +- } while (ret > 0); ++ } + + if (do_datacrc) + con->in_data_crc = crc; + +- return ret < 0 ? ret : 1; /* must return > 0 to indicate success */ ++ return ret; + } + + static int read_partial_msg_data(struct ceph_connection *con) +@@ -1253,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con) + if (!m->num_data_items) + return -EIO; + +- if (m->sparse_read) ++ if (m->sparse_read_total) + ret = read_partial_sparse_msg_data(con); + else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + ret = read_partial_msg_data_bounce(con); +diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c +index d09a39ff2cf0..a901cae2f106 100644 +--- a/net/ceph/messenger_v2.c ++++ b/net/ceph/messenger_v2.c +@@ -1132,7 +1132,7 @@ static int decrypt_tail(struct ceph_connection *con) + struct sg_table enc_sgt = {}; + struct sg_table sgt = {}; + struct page **pages = NULL; +- bool sparse = con->in_msg->sparse_read; ++ bool sparse = !!con->in_msg->sparse_read_total; + int dpos = 0; + int tail_len; + int ret; +@@ -2064,7 +2064,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con) + } + + if (data_len(msg)) { +- if (msg->sparse_read) ++ if (msg->sparse_read_total) + con->v2.in_state = IN_S_PREPARE_SPARSE_DATA; + else + con->v2.in_state = IN_S_PREPARE_READ_DATA; +diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c +index d3a759e052c8..8d9760397b88 100644 +--- a/net/ceph/osd_client.c ++++ b/net/ceph/osd_client.c +@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con, + } + + m = ceph_msg_get(req->r_reply); +- m->sparse_read = (bool)srlen; ++ m->sparse_read_total = srlen; + + dout("get_reply tid %lld %p\n", tid, m); + +@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con, + } + + if (o->o_sparse_op_idx < 0) { +- u64 srlen = sparse_data_requested(req); +- +- dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n", +- __func__, o->o_osd, srlen); +- ceph_msg_data_cursor_init(cursor, con->in_msg, srlen); ++ dout("%s: [%d] starting new sparse read req\n", ++ __func__, o->o_osd); + } else { + u64 end; + +-- +2.43.0 + diff --git a/queue-6.6/libceph-rename-read_sparse_msg_-to-read_partial_spar.patch b/queue-6.6/libceph-rename-read_sparse_msg_-to-read_partial_spar.patch new file mode 100644 index 00000000000..9524752fb26 --- /dev/null +++ b/queue-6.6/libceph-rename-read_sparse_msg_-to-read_partial_spar.patch @@ -0,0 +1,68 @@ +From ac25c33671479a4f4557865ef4371824ca845da2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 14 Dec 2023 09:21:15 +0800 +Subject: libceph: rename read_sparse_msg_*() to read_partial_sparse_msg_*() + +From: Xiubo Li + +[ Upstream commit ee97302fbc0c98a25732d736fc73aaf4d62c4128 ] + +These functions are supposed to behave like other read_partial_*() +handlers: the contract with messenger v1 is that the handler bails if +the area of the message it's responsible for is already processed. +This comes up when handling short reads from the socket. + +[ idryomov: changelog ] + +Signed-off-by: Xiubo Li +Acked-by: Jeff Layton +Signed-off-by: Ilya Dryomov +Stable-dep-of: 8e46a2d068c9 ("libceph: just wait for more data to be available on the socket") +Signed-off-by: Sasha Levin +--- + net/ceph/messenger_v1.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c +index f9a50d7f0d20..4cb60bacf5f5 100644 +--- a/net/ceph/messenger_v1.c ++++ b/net/ceph/messenger_v1.c +@@ -991,7 +991,7 @@ static inline int read_partial_message_section(struct ceph_connection *con, + return read_partial_message_chunk(con, section, sec_len, crc); + } + +-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) ++static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc) + { + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE); +@@ -1026,7 +1026,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc) + return 1; + } + +-static int read_sparse_msg_data(struct ceph_connection *con) ++static int read_partial_sparse_msg_data(struct ceph_connection *con) + { + struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor; + bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC); +@@ -1043,7 +1043,7 @@ static int read_sparse_msg_data(struct ceph_connection *con) + con->v1.in_sr_len, + &crc); + else if (cursor->sr_resid > 0) +- ret = read_sparse_msg_extent(con, &crc); ++ ret = read_partial_sparse_msg_extent(con, &crc); + + if (ret <= 0) { + if (do_datacrc) +@@ -1254,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con) + return -EIO; + + if (m->sparse_read) +- ret = read_sparse_msg_data(con); ++ ret = read_partial_sparse_msg_data(con); + else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE)) + ret = read_partial_msg_data_bounce(con); + else +-- +2.43.0 + diff --git a/queue-6.6/mm-introduce-flush_cache_vmap_early.patch b/queue-6.6/mm-introduce-flush_cache_vmap_early.patch new file mode 100644 index 00000000000..7bccf547dd3 --- /dev/null +++ b/queue-6.6/mm-introduce-flush_cache_vmap_early.patch @@ -0,0 +1,288 @@ +From d5a931e006f0fb96611c76ddc07b343b4fe0542f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 12 Dec 2023 22:34:56 +0100 +Subject: mm: Introduce flush_cache_vmap_early() + +From: Alexandre Ghiti + +[ Upstream commit 7a92fc8b4d20680e4c20289a670d8fca2d1f2c1b ] + +The pcpu setup when using the page allocator sets up a new vmalloc +mapping very early in the boot process, so early that it cannot use the +flush_cache_vmap() function which may depend on structures not yet +initialized (for example in riscv, we currently send an IPI to flush +other cpus TLB). + +But on some architectures, we must call flush_cache_vmap(): for example, +in riscv, some uarchs can cache invalid TLB entries so we need to flush +the new established mapping to avoid taking an exception. + +So fix this by introducing a new function flush_cache_vmap_early() which +is called right after setting the new page table entry and before +accessing this new mapping. This new function implements a local flush +tlb on riscv and is no-op for other architectures (same as today). + +Signed-off-by: Alexandre Ghiti +Acked-by: Geert Uytterhoeven +Signed-off-by: Dennis Zhou +Stable-dep-of: d9807d60c145 ("riscv: mm: execute local TLB flush after populating vmemmap") +Signed-off-by: Sasha Levin +--- + arch/arc/include/asm/cacheflush.h | 1 + + arch/arm/include/asm/cacheflush.h | 2 ++ + arch/csky/abiv1/inc/abi/cacheflush.h | 1 + + arch/csky/abiv2/inc/abi/cacheflush.h | 1 + + arch/m68k/include/asm/cacheflush_mm.h | 1 + + arch/mips/include/asm/cacheflush.h | 2 ++ + arch/nios2/include/asm/cacheflush.h | 1 + + arch/parisc/include/asm/cacheflush.h | 1 + + arch/riscv/include/asm/cacheflush.h | 3 ++- + arch/riscv/include/asm/tlbflush.h | 1 + + arch/riscv/mm/tlbflush.c | 5 +++++ + arch/sh/include/asm/cacheflush.h | 1 + + arch/sparc/include/asm/cacheflush_32.h | 1 + + arch/sparc/include/asm/cacheflush_64.h | 1 + + arch/xtensa/include/asm/cacheflush.h | 6 ++++-- + include/asm-generic/cacheflush.h | 6 ++++++ + mm/percpu.c | 8 +------- + 17 files changed, 32 insertions(+), 10 deletions(-) + +diff --git a/arch/arc/include/asm/cacheflush.h b/arch/arc/include/asm/cacheflush.h +index bd5b1a9a0544..6fc74500a9f5 100644 +--- a/arch/arc/include/asm/cacheflush.h ++++ b/arch/arc/include/asm/cacheflush.h +@@ -40,6 +40,7 @@ void dma_cache_wback(phys_addr_t start, unsigned long sz); + + /* TBD: optimize this */ + #define flush_cache_vmap(start, end) flush_cache_all() ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) flush_cache_all() + + #define flush_cache_dup_mm(mm) /* called on fork (VIVT only) */ +diff --git a/arch/arm/include/asm/cacheflush.h b/arch/arm/include/asm/cacheflush.h +index f6181f69577f..1075534b0a2e 100644 +--- a/arch/arm/include/asm/cacheflush.h ++++ b/arch/arm/include/asm/cacheflush.h +@@ -340,6 +340,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) + dsb(ishst); + } + ++#define flush_cache_vmap_early(start, end) do { } while (0) ++ + static inline void flush_cache_vunmap(unsigned long start, unsigned long end) + { + if (!cache_is_vipt_nonaliasing()) +diff --git a/arch/csky/abiv1/inc/abi/cacheflush.h b/arch/csky/abiv1/inc/abi/cacheflush.h +index 908d8b0bc4fd..d011a81575d2 100644 +--- a/arch/csky/abiv1/inc/abi/cacheflush.h ++++ b/arch/csky/abiv1/inc/abi/cacheflush.h +@@ -43,6 +43,7 @@ static inline void flush_anon_page(struct vm_area_struct *vma, + */ + extern void flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end); + #define flush_cache_vmap(start, end) cache_wbinv_all() ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) cache_wbinv_all() + + #define flush_icache_range(start, end) cache_wbinv_range(start, end) +diff --git a/arch/csky/abiv2/inc/abi/cacheflush.h b/arch/csky/abiv2/inc/abi/cacheflush.h +index 40be16907267..6513ac5d2578 100644 +--- a/arch/csky/abiv2/inc/abi/cacheflush.h ++++ b/arch/csky/abiv2/inc/abi/cacheflush.h +@@ -41,6 +41,7 @@ void flush_icache_mm_range(struct mm_struct *mm, + void flush_icache_deferred(struct mm_struct *mm); + + #define flush_cache_vmap(start, end) do { } while (0) ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) do { } while (0) + + #define copy_to_user_page(vma, page, vaddr, dst, src, len) \ +diff --git a/arch/m68k/include/asm/cacheflush_mm.h b/arch/m68k/include/asm/cacheflush_mm.h +index ed12358c4783..9a71b0148461 100644 +--- a/arch/m68k/include/asm/cacheflush_mm.h ++++ b/arch/m68k/include/asm/cacheflush_mm.h +@@ -191,6 +191,7 @@ extern void cache_push_v(unsigned long vaddr, int len); + #define flush_cache_all() __flush_cache_all() + + #define flush_cache_vmap(start, end) flush_cache_all() ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) flush_cache_all() + + static inline void flush_cache_mm(struct mm_struct *mm) +diff --git a/arch/mips/include/asm/cacheflush.h b/arch/mips/include/asm/cacheflush.h +index f36c2519ed97..1f14132b3fc9 100644 +--- a/arch/mips/include/asm/cacheflush.h ++++ b/arch/mips/include/asm/cacheflush.h +@@ -97,6 +97,8 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) + __flush_cache_vmap(); + } + ++#define flush_cache_vmap_early(start, end) do { } while (0) ++ + extern void (*__flush_cache_vunmap)(void); + + static inline void flush_cache_vunmap(unsigned long start, unsigned long end) +diff --git a/arch/nios2/include/asm/cacheflush.h b/arch/nios2/include/asm/cacheflush.h +index 348cea097792..81484a776b33 100644 +--- a/arch/nios2/include/asm/cacheflush.h ++++ b/arch/nios2/include/asm/cacheflush.h +@@ -38,6 +38,7 @@ void flush_icache_pages(struct vm_area_struct *vma, struct page *page, + #define flush_icache_pages flush_icache_pages + + #define flush_cache_vmap(start, end) flush_dcache_range(start, end) ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) flush_dcache_range(start, end) + + extern void copy_to_user_page(struct vm_area_struct *vma, struct page *page, +diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h +index b4006f2a9705..ba4c05bc24d6 100644 +--- a/arch/parisc/include/asm/cacheflush.h ++++ b/arch/parisc/include/asm/cacheflush.h +@@ -41,6 +41,7 @@ void flush_kernel_vmap_range(void *vaddr, int size); + void invalidate_kernel_vmap_range(void *vaddr, int size); + + #define flush_cache_vmap(start, end) flush_cache_all() ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) flush_cache_all() + + void flush_dcache_folio(struct folio *folio); +diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h +index 3cb53c4df27c..a129dac4521d 100644 +--- a/arch/riscv/include/asm/cacheflush.h ++++ b/arch/riscv/include/asm/cacheflush.h +@@ -37,7 +37,8 @@ static inline void flush_dcache_page(struct page *page) + flush_icache_mm(vma->vm_mm, 0) + + #ifdef CONFIG_64BIT +-#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) ++#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) ++#define flush_cache_vmap_early(start, end) local_flush_tlb_kernel_range(start, end) + #endif + + #ifndef CONFIG_SMP +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index 8f3418c5f172..a60416bbe190 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -41,6 +41,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); + void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); + void flush_tlb_kernel_range(unsigned long start, unsigned long end); ++void local_flush_tlb_kernel_range(unsigned long start, unsigned long end); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index 8723adc884c7..b1ab6cf78e9e 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -65,6 +65,11 @@ static inline void local_flush_tlb_range_asid(unsigned long start, + local_flush_tlb_range_threshold_asid(start, size, stride, asid); + } + ++void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) ++{ ++ local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); ++} ++ + static void __ipi_flush_tlb_all(void *info) + { + local_flush_tlb_all(); +diff --git a/arch/sh/include/asm/cacheflush.h b/arch/sh/include/asm/cacheflush.h +index 878b6b551bd2..51112f54552b 100644 +--- a/arch/sh/include/asm/cacheflush.h ++++ b/arch/sh/include/asm/cacheflush.h +@@ -90,6 +90,7 @@ extern void copy_from_user_page(struct vm_area_struct *vma, + unsigned long len); + + #define flush_cache_vmap(start, end) local_flush_cache_all(NULL) ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) local_flush_cache_all(NULL) + + #define flush_dcache_mmap_lock(mapping) do { } while (0) +diff --git a/arch/sparc/include/asm/cacheflush_32.h b/arch/sparc/include/asm/cacheflush_32.h +index f3b7270bf71b..9fee0ccfccb8 100644 +--- a/arch/sparc/include/asm/cacheflush_32.h ++++ b/arch/sparc/include/asm/cacheflush_32.h +@@ -48,6 +48,7 @@ static inline void flush_dcache_page(struct page *page) + #define flush_dcache_mmap_unlock(mapping) do { } while (0) + + #define flush_cache_vmap(start, end) flush_cache_all() ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) flush_cache_all() + + /* When a context switch happens we must flush all user windows so that +diff --git a/arch/sparc/include/asm/cacheflush_64.h b/arch/sparc/include/asm/cacheflush_64.h +index 0e879004efff..2b1261b77ecd 100644 +--- a/arch/sparc/include/asm/cacheflush_64.h ++++ b/arch/sparc/include/asm/cacheflush_64.h +@@ -75,6 +75,7 @@ void flush_ptrace_access(struct vm_area_struct *, struct page *, + #define flush_dcache_mmap_unlock(mapping) do { } while (0) + + #define flush_cache_vmap(start, end) do { } while (0) ++#define flush_cache_vmap_early(start, end) do { } while (0) + #define flush_cache_vunmap(start, end) do { } while (0) + + #endif /* !__ASSEMBLY__ */ +diff --git a/arch/xtensa/include/asm/cacheflush.h b/arch/xtensa/include/asm/cacheflush.h +index 785a00ce83c1..38bcecb0e457 100644 +--- a/arch/xtensa/include/asm/cacheflush.h ++++ b/arch/xtensa/include/asm/cacheflush.h +@@ -116,8 +116,9 @@ void flush_cache_page(struct vm_area_struct*, + #define flush_cache_mm(mm) flush_cache_all() + #define flush_cache_dup_mm(mm) flush_cache_mm(mm) + +-#define flush_cache_vmap(start,end) flush_cache_all() +-#define flush_cache_vunmap(start,end) flush_cache_all() ++#define flush_cache_vmap(start,end) flush_cache_all() ++#define flush_cache_vmap_early(start,end) do { } while (0) ++#define flush_cache_vunmap(start,end) flush_cache_all() + + void flush_dcache_folio(struct folio *folio); + #define flush_dcache_folio flush_dcache_folio +@@ -140,6 +141,7 @@ void local_flush_cache_page(struct vm_area_struct *vma, + #define flush_cache_dup_mm(mm) do { } while (0) + + #define flush_cache_vmap(start,end) do { } while (0) ++#define flush_cache_vmap_early(start,end) do { } while (0) + #define flush_cache_vunmap(start,end) do { } while (0) + + #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +diff --git a/include/asm-generic/cacheflush.h b/include/asm-generic/cacheflush.h +index 84ec53ccc450..7ee8a179d103 100644 +--- a/include/asm-generic/cacheflush.h ++++ b/include/asm-generic/cacheflush.h +@@ -91,6 +91,12 @@ static inline void flush_cache_vmap(unsigned long start, unsigned long end) + } + #endif + ++#ifndef flush_cache_vmap_early ++static inline void flush_cache_vmap_early(unsigned long start, unsigned long end) ++{ ++} ++#endif ++ + #ifndef flush_cache_vunmap + static inline void flush_cache_vunmap(unsigned long start, unsigned long end) + { +diff --git a/mm/percpu.c b/mm/percpu.c +index a7665de8485f..d287cebd58ca 100644 +--- a/mm/percpu.c ++++ b/mm/percpu.c +@@ -3306,13 +3306,7 @@ int __init pcpu_page_first_chunk(size_t reserved_size, pcpu_fc_cpu_to_node_fn_t + if (rc < 0) + panic("failed to map percpu area, err=%d\n", rc); + +- /* +- * FIXME: Archs with virtual cache should flush local +- * cache for the linear mapping here - something +- * equivalent to flush_cache_vmap() on the local cpu. +- * flush_cache_vmap() can't be used as most supporting +- * data structures are not set up yet. +- */ ++ flush_cache_vmap_early(unit_addr, unit_addr + ai->unit_size); + + /* copy static data */ + memcpy((void *)unit_addr, __per_cpu_load, ai->static_size); +-- +2.43.0 + diff --git a/queue-6.6/net-atlantic-fix-dma-mapping-for-ptp-hwts-ring.patch b/queue-6.6/net-atlantic-fix-dma-mapping-for-ptp-hwts-ring.patch new file mode 100644 index 00000000000..f23eee6e15a --- /dev/null +++ b/queue-6.6/net-atlantic-fix-dma-mapping-for-ptp-hwts-ring.patch @@ -0,0 +1,122 @@ +From a6b3afacd540edd114d7174adc26a8688cb6e11c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 10:47:51 +0100 +Subject: net: atlantic: Fix DMA mapping for PTP hwts ring + +From: Ivan Vecera + +[ Upstream commit 2e7d3b67630dfd8f178c41fa2217aa00e79a5887 ] + +Function aq_ring_hwts_rx_alloc() maps extra AQ_CFG_RXDS_DEF bytes +for PTP HWTS ring but then generic aq_ring_free() does not take this +into account. +Create and use a specific function to free HWTS ring to fix this +issue. + +Trace: +[ 215.351607] ------------[ cut here ]------------ +[ 215.351612] DMA-API: atlantic 0000:4b:00.0: device driver frees DMA memory with different size [device address=0x00000000fbdd0000] [map size=34816 bytes] [unmap size=32768 bytes] +[ 215.351635] WARNING: CPU: 33 PID: 10759 at kernel/dma/debug.c:988 check_unmap+0xa6f/0x2360 +... +[ 215.581176] Call Trace: +[ 215.583632] +[ 215.585745] ? show_trace_log_lvl+0x1c4/0x2df +[ 215.590114] ? show_trace_log_lvl+0x1c4/0x2df +[ 215.594497] ? debug_dma_free_coherent+0x196/0x210 +[ 215.599305] ? check_unmap+0xa6f/0x2360 +[ 215.603147] ? __warn+0xca/0x1d0 +[ 215.606391] ? check_unmap+0xa6f/0x2360 +[ 215.610237] ? report_bug+0x1ef/0x370 +[ 215.613921] ? handle_bug+0x3c/0x70 +[ 215.617423] ? exc_invalid_op+0x14/0x50 +[ 215.621269] ? asm_exc_invalid_op+0x16/0x20 +[ 215.625480] ? check_unmap+0xa6f/0x2360 +[ 215.629331] ? mark_lock.part.0+0xca/0xa40 +[ 215.633445] debug_dma_free_coherent+0x196/0x210 +[ 215.638079] ? __pfx_debug_dma_free_coherent+0x10/0x10 +[ 215.643242] ? slab_free_freelist_hook+0x11d/0x1d0 +[ 215.648060] dma_free_attrs+0x6d/0x130 +[ 215.651834] aq_ring_free+0x193/0x290 [atlantic] +[ 215.656487] aq_ptp_ring_free+0x67/0x110 [atlantic] +... +[ 216.127540] ---[ end trace 6467e5964dd2640b ]--- +[ 216.132160] DMA-API: Mapped at: +[ 216.132162] debug_dma_alloc_coherent+0x66/0x2f0 +[ 216.132165] dma_alloc_attrs+0xf5/0x1b0 +[ 216.132168] aq_ring_hwts_rx_alloc+0x150/0x1f0 [atlantic] +[ 216.132193] aq_ptp_ring_alloc+0x1bb/0x540 [atlantic] +[ 216.132213] aq_nic_init+0x4a1/0x760 [atlantic] + +Fixes: 94ad94558b0f ("net: aquantia: add PTP rings infrastructure") +Signed-off-by: Ivan Vecera +Reviewed-by: Jiri Pirko +Link: https://lore.kernel.org/r/20240201094752.883026-1-ivecera@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/aquantia/atlantic/aq_ptp.c | 4 ++-- + drivers/net/ethernet/aquantia/atlantic/aq_ring.c | 13 +++++++++++++ + drivers/net/ethernet/aquantia/atlantic/aq_ring.h | 1 + + 3 files changed, 16 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +index abd4832e4ed2..5acb3e16b567 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c +@@ -993,7 +993,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic) + return 0; + + err_exit_hwts_rx: +- aq_ring_free(&aq_ptp->hwts_rx); ++ aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); + err_exit_ptp_rx: + aq_ring_free(&aq_ptp->ptp_rx); + err_exit_ptp_tx: +@@ -1011,7 +1011,7 @@ void aq_ptp_ring_free(struct aq_nic_s *aq_nic) + + aq_ring_free(&aq_ptp->ptp_tx); + aq_ring_free(&aq_ptp->ptp_rx); +- aq_ring_free(&aq_ptp->hwts_rx); ++ aq_ring_hwts_rx_free(&aq_ptp->hwts_rx); + + aq_ptp_skb_ring_release(&aq_ptp->skb_ring); + } +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +index cda8597b4e14..f7433abd6591 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c +@@ -919,6 +919,19 @@ void aq_ring_free(struct aq_ring_s *self) + } + } + ++void aq_ring_hwts_rx_free(struct aq_ring_s *self) ++{ ++ if (!self) ++ return; ++ ++ if (self->dx_ring) { ++ dma_free_coherent(aq_nic_get_dev(self->aq_nic), ++ self->size * self->dx_size + AQ_CFG_RXDS_DEF, ++ self->dx_ring, self->dx_ring_pa); ++ self->dx_ring = NULL; ++ } ++} ++ + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data) + { + unsigned int count; +diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +index 52847310740a..d627ace850ff 100644 +--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.h ++++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.h +@@ -210,6 +210,7 @@ int aq_ring_rx_fill(struct aq_ring_s *self); + int aq_ring_hwts_rx_alloc(struct aq_ring_s *self, + struct aq_nic_s *aq_nic, unsigned int idx, + unsigned int size, unsigned int dx_size); ++void aq_ring_hwts_rx_free(struct aq_ring_s *self); + void aq_ring_hwts_rx_clean(struct aq_ring_s *self, struct aq_nic_s *aq_nic); + + unsigned int aq_ring_fill_stats_data(struct aq_ring_s *self, u64 *data); +-- +2.43.0 + diff --git a/queue-6.6/net-stmmac-xgmac-fix-handling-of-dpp-safety-error-fo.patch b/queue-6.6/net-stmmac-xgmac-fix-handling-of-dpp-safety-error-fo.patch new file mode 100644 index 00000000000..c1e0bff162e --- /dev/null +++ b/queue-6.6/net-stmmac-xgmac-fix-handling-of-dpp-safety-error-fo.patch @@ -0,0 +1,158 @@ +From efa4b64539844abd5c8f86c24d48c9629f43d1d9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Jan 2024 10:08:28 +0800 +Subject: net: stmmac: xgmac: fix handling of DPP safety error for DMA channels + +From: Furong Xu <0x1207@gmail.com> + +[ Upstream commit 46eba193d04f8bd717e525eb4110f3c46c12aec3 ] + +Commit 56e58d6c8a56 ("net: stmmac: Implement Safety Features in +XGMAC core") checks and reports safety errors, but leaves the +Data Path Parity Errors for each channel in DMA unhandled at all, lead to +a storm of interrupt. +Fix it by checking and clearing the DMA_DPP_Interrupt_Status register. + +Fixes: 56e58d6c8a56 ("net: stmmac: Implement Safety Features in XGMAC core") +Signed-off-by: Furong Xu <0x1207@gmail.com> +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/stmicro/stmmac/common.h | 1 + + .../net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 + + .../ethernet/stmicro/stmmac/dwxgmac2_core.c | 57 ++++++++++++++++++- + 3 files changed, 60 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h +index 1e996c29043d..3d4f34e178a8 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/common.h ++++ b/drivers/net/ethernet/stmicro/stmmac/common.h +@@ -216,6 +216,7 @@ struct stmmac_safety_stats { + unsigned long mac_errors[32]; + unsigned long mtl_errors[32]; + unsigned long dma_errors[32]; ++ unsigned long dma_dpp_errors[32]; + }; + + /* Number of fields in Safety Stats */ +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +index a4e8b498dea9..7d7133ef4994 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h ++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +@@ -319,6 +319,8 @@ + #define XGMAC_RXCEIE BIT(4) + #define XGMAC_TXCEIE BIT(0) + #define XGMAC_MTL_ECC_INT_STATUS 0x000010cc ++#define XGMAC_MTL_DPP_CONTROL 0x000010e0 ++#define XGMAC_DDPP_DISABLE BIT(0) + #define XGMAC_MTL_TXQ_OPMODE(x) (0x00001100 + (0x80 * (x))) + #define XGMAC_TQS GENMASK(25, 16) + #define XGMAC_TQS_SHIFT 16 +@@ -401,6 +403,7 @@ + #define XGMAC_DCEIE BIT(1) + #define XGMAC_TCEIE BIT(0) + #define XGMAC_DMA_ECC_INT_STATUS 0x0000306c ++#define XGMAC_DMA_DPP_INT_STATUS 0x00003074 + #define XGMAC_DMA_CH_CONTROL(x) (0x00003100 + (0x80 * (x))) + #define XGMAC_SPH BIT(24) + #define XGMAC_PBLx8 BIT(16) +diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +index a74e71db79f9..e7eccc0c406f 100644 +--- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c ++++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +@@ -830,6 +830,43 @@ static const struct dwxgmac3_error_desc dwxgmac3_dma_errors[32]= { + { false, "UNKNOWN", "Unknown Error" }, /* 31 */ + }; + ++static const char * const dpp_rx_err = "Read Rx Descriptor Parity checker Error"; ++static const char * const dpp_tx_err = "Read Tx Descriptor Parity checker Error"; ++static const struct dwxgmac3_error_desc dwxgmac3_dma_dpp_errors[32] = { ++ { true, "TDPES0", dpp_tx_err }, ++ { true, "TDPES1", dpp_tx_err }, ++ { true, "TDPES2", dpp_tx_err }, ++ { true, "TDPES3", dpp_tx_err }, ++ { true, "TDPES4", dpp_tx_err }, ++ { true, "TDPES5", dpp_tx_err }, ++ { true, "TDPES6", dpp_tx_err }, ++ { true, "TDPES7", dpp_tx_err }, ++ { true, "TDPES8", dpp_tx_err }, ++ { true, "TDPES9", dpp_tx_err }, ++ { true, "TDPES10", dpp_tx_err }, ++ { true, "TDPES11", dpp_tx_err }, ++ { true, "TDPES12", dpp_tx_err }, ++ { true, "TDPES13", dpp_tx_err }, ++ { true, "TDPES14", dpp_tx_err }, ++ { true, "TDPES15", dpp_tx_err }, ++ { true, "RDPES0", dpp_rx_err }, ++ { true, "RDPES1", dpp_rx_err }, ++ { true, "RDPES2", dpp_rx_err }, ++ { true, "RDPES3", dpp_rx_err }, ++ { true, "RDPES4", dpp_rx_err }, ++ { true, "RDPES5", dpp_rx_err }, ++ { true, "RDPES6", dpp_rx_err }, ++ { true, "RDPES7", dpp_rx_err }, ++ { true, "RDPES8", dpp_rx_err }, ++ { true, "RDPES9", dpp_rx_err }, ++ { true, "RDPES10", dpp_rx_err }, ++ { true, "RDPES11", dpp_rx_err }, ++ { true, "RDPES12", dpp_rx_err }, ++ { true, "RDPES13", dpp_rx_err }, ++ { true, "RDPES14", dpp_rx_err }, ++ { true, "RDPES15", dpp_rx_err }, ++}; ++ + static void dwxgmac3_handle_dma_err(struct net_device *ndev, + void __iomem *ioaddr, bool correctable, + struct stmmac_safety_stats *stats) +@@ -841,6 +878,13 @@ static void dwxgmac3_handle_dma_err(struct net_device *ndev, + + dwxgmac3_log_error(ndev, value, correctable, "DMA", + dwxgmac3_dma_errors, STAT_OFF(dma_errors), stats); ++ ++ value = readl(ioaddr + XGMAC_DMA_DPP_INT_STATUS); ++ writel(value, ioaddr + XGMAC_DMA_DPP_INT_STATUS); ++ ++ dwxgmac3_log_error(ndev, value, false, "DMA_DPP", ++ dwxgmac3_dma_dpp_errors, ++ STAT_OFF(dma_dpp_errors), stats); + } + + static int +@@ -881,6 +925,12 @@ dwxgmac3_safety_feat_config(void __iomem *ioaddr, unsigned int asp, + value |= XGMAC_TMOUTEN; /* FSM Timeout Feature */ + writel(value, ioaddr + XGMAC_MAC_FSM_CONTROL); + ++ /* 5. Enable Data Path Parity Protection */ ++ value = readl(ioaddr + XGMAC_MTL_DPP_CONTROL); ++ /* already enabled by default, explicit enable it again */ ++ value &= ~XGMAC_DDPP_DISABLE; ++ writel(value, ioaddr + XGMAC_MTL_DPP_CONTROL); ++ + return 0; + } + +@@ -914,7 +964,11 @@ static int dwxgmac3_safety_feat_irq_status(struct net_device *ndev, + ret |= !corr; + } + +- err = dma & (XGMAC_DEUIS | XGMAC_DECIS); ++ /* DMA_DPP_Interrupt_Status is indicated by MCSIS bit in ++ * DMA_Safety_Interrupt_Status, so we handle DMA Data Path ++ * Parity Errors here ++ */ ++ err = dma & (XGMAC_DEUIS | XGMAC_DECIS | XGMAC_MCSIS); + corr = dma & XGMAC_DECIS; + if (err) { + dwxgmac3_handle_dma_err(ndev, ioaddr, corr, stats); +@@ -930,6 +984,7 @@ static const struct dwxgmac3_error { + { dwxgmac3_mac_errors }, + { dwxgmac3_mtl_errors }, + { dwxgmac3_dma_errors }, ++ { dwxgmac3_dma_dpp_errors }, + }; + + static int dwxgmac3_safety_feat_dump(struct stmmac_safety_stats *stats, +-- +2.43.0 + diff --git a/queue-6.6/netdevsim-avoid-potential-loop-in-nsim_dev_trap_repo.patch b/queue-6.6/netdevsim-avoid-potential-loop-in-nsim_dev_trap_repo.patch new file mode 100644 index 00000000000..40d3ab1d8ec --- /dev/null +++ b/queue-6.6/netdevsim-avoid-potential-loop-in-nsim_dev_trap_repo.patch @@ -0,0 +1,103 @@ +From a58bc391a4adb97e7ee7bdbde9bc50f27e9f82e6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 17:53:24 +0000 +Subject: netdevsim: avoid potential loop in nsim_dev_trap_report_work() + +From: Eric Dumazet + +[ Upstream commit ba5e1272142d051dcc57ca1d3225ad8a089f9858 ] + +Many syzbot reports include the following trace [1] + +If nsim_dev_trap_report_work() can not grab the mutex, +it should rearm itself at least one jiffie later. + +[1] +Sending NMI from CPU 1 to CPUs 0: +NMI backtrace for cpu 0 +CPU: 0 PID: 32383 Comm: kworker/0:2 Not tainted 6.8.0-rc2-syzkaller-00031-g861c0981648f #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 +Workqueue: events nsim_dev_trap_report_work + RIP: 0010:bytes_is_nonzero mm/kasan/generic.c:89 [inline] + RIP: 0010:memory_is_nonzero mm/kasan/generic.c:104 [inline] + RIP: 0010:memory_is_poisoned_n mm/kasan/generic.c:129 [inline] + RIP: 0010:memory_is_poisoned mm/kasan/generic.c:161 [inline] + RIP: 0010:check_region_inline mm/kasan/generic.c:180 [inline] + RIP: 0010:kasan_check_range+0x101/0x190 mm/kasan/generic.c:189 +Code: 07 49 39 d1 75 0a 45 3a 11 b8 01 00 00 00 7c 0b 44 89 c2 e8 21 ed ff ff 83 f0 01 5b 5d 41 5c c3 48 85 d2 74 4f 48 01 ea eb 09 <48> 83 c0 01 48 39 d0 74 41 80 38 00 74 f2 eb b6 41 bc 08 00 00 00 +RSP: 0018:ffffc90012dcf998 EFLAGS: 00000046 +RAX: fffffbfff258af1e RBX: fffffbfff258af1f RCX: ffffffff8168eda3 +RDX: fffffbfff258af1f RSI: 0000000000000004 RDI: ffffffff92c578f0 +RBP: fffffbfff258af1e R08: 0000000000000000 R09: fffffbfff258af1e +R10: ffffffff92c578f3 R11: ffffffff8acbcbc0 R12: 0000000000000002 +R13: ffff88806db38400 R14: 1ffff920025b9f42 R15: ffffffff92c578e8 +FS: 0000000000000000(0000) GS:ffff8880b9800000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: 000000c00994e078 CR3: 000000002c250000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + + + instrument_atomic_read include/linux/instrumented.h:68 [inline] + atomic_read include/linux/atomic/atomic-instrumented.h:32 [inline] + queued_spin_is_locked include/asm-generic/qspinlock.h:57 [inline] + debug_spin_unlock kernel/locking/spinlock_debug.c:101 [inline] + do_raw_spin_unlock+0x53/0x230 kernel/locking/spinlock_debug.c:141 + __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:150 [inline] + _raw_spin_unlock_irqrestore+0x22/0x70 kernel/locking/spinlock.c:194 + debug_object_activate+0x349/0x540 lib/debugobjects.c:726 + debug_work_activate kernel/workqueue.c:578 [inline] + insert_work+0x30/0x230 kernel/workqueue.c:1650 + __queue_work+0x62e/0x11d0 kernel/workqueue.c:1802 + __queue_delayed_work+0x1bf/0x270 kernel/workqueue.c:1953 + queue_delayed_work_on+0x106/0x130 kernel/workqueue.c:1989 + queue_delayed_work include/linux/workqueue.h:563 [inline] + schedule_delayed_work include/linux/workqueue.h:677 [inline] + nsim_dev_trap_report_work+0x9c0/0xc80 drivers/net/netdevsim/dev.c:842 + process_one_work+0x886/0x15d0 kernel/workqueue.c:2633 + process_scheduled_works kernel/workqueue.c:2706 [inline] + worker_thread+0x8b9/0x1290 kernel/workqueue.c:2787 + kthread+0x2c6/0x3a0 kernel/kthread.c:388 + ret_from_fork+0x45/0x80 arch/x86/kernel/process.c:147 + ret_from_fork_asm+0x11/0x20 arch/x86/entry/entry_64.S:242 + + +Fixes: 012ec02ae441 ("netdevsim: convert driver to use unlocked devlink API during init/fini") +Reported-by: syzbot +Signed-off-by: Eric Dumazet +Reviewed-by: Jiri Pirko +Link: https://lore.kernel.org/r/20240201175324.3752746-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/netdevsim/dev.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c +index b4d3b9cde8bd..92a7a36b93ac 100644 +--- a/drivers/net/netdevsim/dev.c ++++ b/drivers/net/netdevsim/dev.c +@@ -835,14 +835,14 @@ static void nsim_dev_trap_report_work(struct work_struct *work) + trap_report_dw.work); + nsim_dev = nsim_trap_data->nsim_dev; + +- /* For each running port and enabled packet trap, generate a UDP +- * packet with a random 5-tuple and report it. +- */ + if (!devl_trylock(priv_to_devlink(nsim_dev))) { +- schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 0); ++ schedule_delayed_work(&nsim_dev->trap_data->trap_report_dw, 1); + return; + } + ++ /* For each running port and enabled packet trap, generate a UDP ++ * packet with a random 5-tuple and report it. ++ */ + list_for_each_entry(nsim_dev_port, &nsim_dev->port_list, list) { + if (!netif_running(nsim_dev_port->ns->netdev)) + continue; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_compat-narrow-down-revision-to-unsigne.patch b/queue-6.6/netfilter-nft_compat-narrow-down-revision-to-unsigne.patch new file mode 100644 index 00000000000..d1fa3792b4a --- /dev/null +++ b/queue-6.6/netfilter-nft_compat-narrow-down-revision-to-unsigne.patch @@ -0,0 +1,52 @@ +From 653db229abf64bd4c0bfe2ea0418774dcb84a288 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 22:58:36 +0100 +Subject: netfilter: nft_compat: narrow down revision to unsigned 8-bits + +From: Pablo Neira Ayuso + +[ Upstream commit 36fa8d697132b4bed2312d700310e8a78b000c84 ] + +xt_find_revision() expects u8, restrict it to this datatype. + +Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_compat.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index f0eeda97bfcd..001b6841a4b6 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr, + + static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = { + [NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING }, +- [NFTA_TARGET_REV] = { .type = NLA_U32 }, ++ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_TARGET_INFO] = { .type = NLA_BINARY }, + }; + +@@ -419,7 +419,7 @@ static void nft_match_eval(const struct nft_expr *expr, + + static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { + [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, +- [NFTA_MATCH_REV] = { .type = NLA_U32 }, ++ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_MATCH_INFO] = { .type = NLA_BINARY }, + }; + +@@ -724,7 +724,7 @@ static int nfnl_compat_get_rcu(struct sk_buff *skb, + static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = { + [NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING, + .len = NFT_COMPAT_NAME_MAX-1 }, +- [NFTA_COMPAT_REV] = { .type = NLA_U32 }, ++ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_COMPAT_TYPE] = { .type = NLA_U32 }, + }; + +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_compat-reject-unused-compat-flag.patch b/queue-6.6/netfilter-nft_compat-reject-unused-compat-flag.patch new file mode 100644 index 00000000000..56853c56ef6 --- /dev/null +++ b/queue-6.6/netfilter-nft_compat-reject-unused-compat-flag.patch @@ -0,0 +1,53 @@ +From d91114e21c684075a923508674d4df6469093bc6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 23:33:29 +0100 +Subject: netfilter: nft_compat: reject unused compat flag + +From: Pablo Neira Ayuso + +[ Upstream commit 292781c3c5485ce33bd22b2ef1b2bed709b4d672 ] + +Flag (1 << 0) is ignored is set, never used, reject it it with EINVAL +instead. + +Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + include/uapi/linux/netfilter/nf_tables.h | 2 ++ + net/netfilter/nft_compat.c | 3 ++- + 2 files changed, 4 insertions(+), 1 deletion(-) + +diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h +index ca30232b7bc8..117c6a9b845b 100644 +--- a/include/uapi/linux/netfilter/nf_tables.h ++++ b/include/uapi/linux/netfilter/nf_tables.h +@@ -285,9 +285,11 @@ enum nft_rule_attributes { + /** + * enum nft_rule_compat_flags - nf_tables rule compat flags + * ++ * @NFT_RULE_COMPAT_F_UNUSED: unused + * @NFT_RULE_COMPAT_F_INV: invert the check result + */ + enum nft_rule_compat_flags { ++ NFT_RULE_COMPAT_F_UNUSED = (1 << 0), + NFT_RULE_COMPAT_F_INV = (1 << 1), + NFT_RULE_COMPAT_F_MASK = NFT_RULE_COMPAT_F_INV, + }; +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index 001b6841a4b6..ed71d5ecbe0a 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -212,7 +212,8 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) + return -EINVAL; + + flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS])); +- if (flags & ~NFT_RULE_COMPAT_F_MASK) ++ if (flags & NFT_RULE_COMPAT_F_UNUSED || ++ flags & ~NFT_RULE_COMPAT_F_MASK) + return -EINVAL; + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_compat-restrict-match-target-protocol-.patch b/queue-6.6/netfilter-nft_compat-restrict-match-target-protocol-.patch new file mode 100644 index 00000000000..ecd78007770 --- /dev/null +++ b/queue-6.6/netfilter-nft_compat-restrict-match-target-protocol-.patch @@ -0,0 +1,51 @@ +From b4a94f7e594e451e2fa0bcf3c80305546fef8b60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 00:05:23 +0100 +Subject: netfilter: nft_compat: restrict match/target protocol to u16 + +From: Pablo Neira Ayuso + +[ Upstream commit d694b754894c93fb4d71a7f3699439dec111decc ] + +xt_check_{match,target} expects u16, but NFTA_RULE_COMPAT_PROTO is u32. + +NLA_POLICY_MAX(NLA_BE32, 65535) cannot be used because .max in +nla_policy is s16, see 3e48be05f3c7 ("netlink: add attribute range +validation to policy"). + +Fixes: 0ca743a55991 ("netfilter: nf_tables: add compatibility layer for x_tables") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_compat.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c +index ed71d5ecbe0a..1f9474fefe84 100644 +--- a/net/netfilter/nft_compat.c ++++ b/net/netfilter/nft_compat.c +@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] + static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) + { + struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1]; ++ u32 l4proto; + u32 flags; + int err; + +@@ -218,7 +219,12 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) + if (flags & NFT_RULE_COMPAT_F_INV) + *inv = true; + +- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); ++ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO])); ++ if (l4proto > U16_MAX) ++ return -EINVAL; ++ ++ *proto = l4proto; ++ + return 0; + } + +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_ct-reject-direction-for-ct-id.patch b/queue-6.6/netfilter-nft_ct-reject-direction-for-ct-id.patch new file mode 100644 index 00000000000..3e9cf3a202b --- /dev/null +++ b/queue-6.6/netfilter-nft_ct-reject-direction-for-ct-id.patch @@ -0,0 +1,36 @@ +From 66630f1b77832ead06b2b55e899ebea930f77c69 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Feb 2024 14:59:24 +0100 +Subject: netfilter: nft_ct: reject direction for ct id + +From: Pablo Neira Ayuso + +[ Upstream commit 38ed1c7062ada30d7c11e7a7acc749bf27aa14aa ] + +Direction attribute is ignored, reject it in case this ever needs to be +supported + +Fixes: 3087c3f7c23b ("netfilter: nft_ct: Add ct id support") +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_ct.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c +index aac98a3c966e..bfd3e5a14dab 100644 +--- a/net/netfilter/nft_ct.c ++++ b/net/netfilter/nft_ct.c +@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, + break; + #endif + case NFT_CT_ID: ++ if (tb[NFTA_CT_DIRECTION]) ++ return -EINVAL; ++ + len = sizeof(u32); + break; + default: +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_set_pipapo-add-helper-to-release-pcpu-.patch b/queue-6.6/netfilter-nft_set_pipapo-add-helper-to-release-pcpu-.patch new file mode 100644 index 00000000000..8f280736efc --- /dev/null +++ b/queue-6.6/netfilter-nft_set_pipapo-add-helper-to-release-pcpu-.patch @@ -0,0 +1,98 @@ +From 0b2e7293d56d0ddd2342247d2a54f8d8f9e30dbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Feb 2024 21:52:47 +0100 +Subject: netfilter: nft_set_pipapo: add helper to release pcpu scratch area + +From: Florian Westphal + +[ Upstream commit 47b1c03c3c1a119435480a1e73f27197dc59131d ] + +After next patch simple kfree() is not enough anymore, so add +a helper for it. + +Reviewed-by: Stefano Brivio +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Stable-dep-of: 5a8cdf6fd860 ("netfilter: nft_set_pipapo: remove scratch_aligned pointer") +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_set_pipapo.c | 28 +++++++++++++++++++++++----- + 1 file changed, 23 insertions(+), 5 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 58e595a84cd0..b6bca59b7ba6 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -1101,6 +1101,24 @@ static void pipapo_map(struct nft_pipapo_match *m, + f->mt[map[i].to + j].e = e; + } + ++/** ++ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address ++ * @m: Matching data ++ * @cpu: CPU number ++ */ ++static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu) ++{ ++ struct nft_pipapo_scratch *s; ++ void *mem; ++ ++ s = *per_cpu_ptr(m->scratch, cpu); ++ if (!s) ++ return; ++ ++ mem = s; ++ kfree(mem); ++} ++ + /** + * pipapo_realloc_scratch() - Reallocate scratch maps for partial match results + * @clone: Copy of matching data with pending insertions and deletions +@@ -1133,7 +1151,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + return -ENOMEM; + } + +- kfree(*per_cpu_ptr(clone->scratch, i)); ++ pipapo_free_scratch(clone, i); + + *per_cpu_ptr(clone->scratch, i) = scratch; + +@@ -1358,7 +1376,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) + } + out_scratch_realloc: + for_each_possible_cpu(i) +- kfree(*per_cpu_ptr(new->scratch, i)); ++ pipapo_free_scratch(new, i); + #ifdef NFT_PIPAPO_ALIGN + free_percpu(new->scratch_aligned); + #endif +@@ -1646,7 +1664,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) + int i; + + for_each_possible_cpu(i) +- kfree(*per_cpu_ptr(m->scratch, i)); ++ pipapo_free_scratch(m, i); + + #ifdef NFT_PIPAPO_ALIGN + free_percpu(m->scratch_aligned); +@@ -2247,7 +2265,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, + free_percpu(m->scratch_aligned); + #endif + for_each_possible_cpu(cpu) +- kfree(*per_cpu_ptr(m->scratch, cpu)); ++ pipapo_free_scratch(m, cpu); + free_percpu(m->scratch); + pipapo_free_fields(m); + kfree(m); +@@ -2264,7 +2282,7 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, + free_percpu(priv->clone->scratch_aligned); + #endif + for_each_possible_cpu(cpu) +- kfree(*per_cpu_ptr(priv->clone->scratch, cpu)); ++ pipapo_free_scratch(priv->clone, cpu); + free_percpu(priv->clone->scratch); + + pipapo_free_fields(priv->clone); +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_set_pipapo-remove-scratch_aligned-poin.patch b/queue-6.6/netfilter-nft_set_pipapo-remove-scratch_aligned-poin.patch new file mode 100644 index 00000000000..0c083fedd20 --- /dev/null +++ b/queue-6.6/netfilter-nft_set_pipapo-remove-scratch_aligned-poin.patch @@ -0,0 +1,202 @@ +From 6ec271047e28b075c64d2e953781e6257ca23d51 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 Feb 2024 10:31:29 +0100 +Subject: netfilter: nft_set_pipapo: remove scratch_aligned pointer + +From: Florian Westphal + +[ Upstream commit 5a8cdf6fd860ac5e6d08d72edbcecee049a7fec4 ] + +use ->scratch for both avx2 and the generic implementation. + +After previous change the scratch->map member is always aligned properly +for AVX2, so we can just use scratch->map in AVX2 too. + +The alignoff delta is stored in the scratchpad so we can reconstruct +the correct address to free the area again. + +Fixes: 7400b063969b ("nft_set_pipapo: Introduce AVX2-based lookup implementation") +Reviewed-by: Stefano Brivio +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_set_pipapo.c | 41 +++++------------------------ + net/netfilter/nft_set_pipapo.h | 6 ++--- + net/netfilter/nft_set_pipapo_avx2.c | 2 +- + 3 files changed, 10 insertions(+), 39 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index b6bca59b7ba6..8e9b20077966 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -1116,6 +1116,7 @@ static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int c + return; + + mem = s; ++ mem -= s->align_off; + kfree(mem); + } + +@@ -1135,6 +1136,7 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + struct nft_pipapo_scratch *scratch; + #ifdef NFT_PIPAPO_ALIGN + void *scratch_aligned; ++ u32 align_off; + #endif + scratch = kzalloc_node(struct_size(scratch, map, + bsize_max * 2) + +@@ -1153,8 +1155,6 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + + pipapo_free_scratch(clone, i); + +- *per_cpu_ptr(clone->scratch, i) = scratch; +- + #ifdef NFT_PIPAPO_ALIGN + /* Align &scratch->map (not the struct itself): the extra + * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() +@@ -1166,8 +1166,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + + scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); + scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); +- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; ++ align_off = scratch_aligned - (void *)scratch; ++ ++ scratch = scratch_aligned; ++ scratch->align_off = align_off; + #endif ++ *per_cpu_ptr(clone->scratch, i) = scratch; + } + + return 0; +@@ -1320,11 +1324,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) + if (!new->scratch) + goto out_scratch; + +-#ifdef NFT_PIPAPO_ALIGN +- new->scratch_aligned = alloc_percpu(*new->scratch_aligned); +- if (!new->scratch_aligned) +- goto out_scratch; +-#endif + for_each_possible_cpu(i) + *per_cpu_ptr(new->scratch, i) = NULL; + +@@ -1377,9 +1376,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old) + out_scratch_realloc: + for_each_possible_cpu(i) + pipapo_free_scratch(new, i); +-#ifdef NFT_PIPAPO_ALIGN +- free_percpu(new->scratch_aligned); +-#endif + out_scratch: + free_percpu(new->scratch); + kfree(new); +@@ -1666,11 +1662,7 @@ static void pipapo_free_match(struct nft_pipapo_match *m) + for_each_possible_cpu(i) + pipapo_free_scratch(m, i); + +-#ifdef NFT_PIPAPO_ALIGN +- free_percpu(m->scratch_aligned); +-#endif + free_percpu(m->scratch); +- + pipapo_free_fields(m); + + kfree(m); +@@ -2165,16 +2157,6 @@ static int nft_pipapo_init(const struct nft_set *set, + for_each_possible_cpu(i) + *per_cpu_ptr(m->scratch, i) = NULL; + +-#ifdef NFT_PIPAPO_ALIGN +- m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); +- if (!m->scratch_aligned) { +- err = -ENOMEM; +- goto out_free; +- } +- for_each_possible_cpu(i) +- *per_cpu_ptr(m->scratch_aligned, i) = NULL; +-#endif +- + rcu_head_init(&m->rcu); + + nft_pipapo_for_each_field(f, i, m) { +@@ -2205,9 +2187,6 @@ static int nft_pipapo_init(const struct nft_set *set, + return 0; + + out_free: +-#ifdef NFT_PIPAPO_ALIGN +- free_percpu(m->scratch_aligned); +-#endif + free_percpu(m->scratch); + out_scratch: + kfree(m); +@@ -2261,9 +2240,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, + + nft_set_pipapo_match_destroy(ctx, set, m); + +-#ifdef NFT_PIPAPO_ALIGN +- free_percpu(m->scratch_aligned); +-#endif + for_each_possible_cpu(cpu) + pipapo_free_scratch(m, cpu); + free_percpu(m->scratch); +@@ -2278,9 +2254,6 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx, + if (priv->dirty) + nft_set_pipapo_match_destroy(ctx, set, m); + +-#ifdef NFT_PIPAPO_ALIGN +- free_percpu(priv->clone->scratch_aligned); +-#endif + for_each_possible_cpu(cpu) + pipapo_free_scratch(priv->clone, cpu); + free_percpu(priv->clone->scratch); +diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h +index 75b1340c6335..a4a58812c108 100644 +--- a/net/netfilter/nft_set_pipapo.h ++++ b/net/netfilter/nft_set_pipapo.h +@@ -133,10 +133,12 @@ struct nft_pipapo_field { + /** + * struct nft_pipapo_scratch - percpu data used for lookup and matching + * @map_index: Current working bitmap index, toggled between field matches ++ * @align_off: Offset to get the originally allocated address + * @map: store partial matching results during lookup + */ + struct nft_pipapo_scratch { + u8 map_index; ++ u32 align_off; + unsigned long map[]; + }; + +@@ -144,16 +146,12 @@ struct nft_pipapo_scratch { + * struct nft_pipapo_match - Data used for lookup and matching + * @field_count Amount of fields in set + * @scratch: Preallocated per-CPU maps for partial matching results +- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes + * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @rcu Matching data is swapped on commits + * @f: Fields, with lookup and mapping tables + */ + struct nft_pipapo_match { + int field_count; +-#ifdef NFT_PIPAPO_ALIGN +- struct nft_pipapo_scratch * __percpu *scratch_aligned; +-#endif + struct nft_pipapo_scratch * __percpu *scratch; + size_t bsize_max; + struct rcu_head rcu; +diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c +index 78213c73af2e..90e275bb3e5d 100644 +--- a/net/netfilter/nft_set_pipapo_avx2.c ++++ b/net/netfilter/nft_set_pipapo_avx2.c +@@ -1139,7 +1139,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + */ + kernel_fpu_begin_mask(0); + +- scratch = *raw_cpu_ptr(m->scratch_aligned); ++ scratch = *raw_cpu_ptr(m->scratch); + if (unlikely(!scratch)) { + kernel_fpu_end(); + return false; +-- +2.43.0 + diff --git a/queue-6.6/netfilter-nft_set_pipapo-store-index-in-scratch-maps.patch b/queue-6.6/netfilter-nft_set_pipapo-store-index-in-scratch-maps.patch new file mode 100644 index 00000000000..7dae1e33842 --- /dev/null +++ b/queue-6.6/netfilter-nft_set_pipapo-store-index-in-scratch-maps.patch @@ -0,0 +1,266 @@ +From e7b25c9e294a67190a07a8a8a4662e28b23eea6a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Feb 2024 21:52:46 +0100 +Subject: netfilter: nft_set_pipapo: store index in scratch maps + +From: Florian Westphal + +[ Upstream commit 76313d1a4aa9e30d5b43dee5efd8bcd4d8250006 ] + +Pipapo needs a scratchpad area to keep state during matching. +This state can be large and thus cannot reside on stack. + +Each set preallocates percpu areas for this. + +On each match stage, one scratchpad half starts with all-zero and the other +is inited to all-ones. + +At the end of each stage, the half that starts with all-ones is +always zero. Before next field is tested, pointers to the two halves +are swapped, i.e. resmap pointer turns into fill pointer and vice versa. + +After the last field has been processed, pipapo stashes the +index toggle in a percpu variable, with assumption that next packet +will start with the all-zero half and sets all bits in the other to 1. + +This isn't reliable. + +There can be multiple sets and we can't be sure that the upper +and lower half of all set scratch map is always in sync (lookups +can be conditional), so one set might have swapped, but other might +not have been queried. + +Thus we need to keep the index per-set-and-cpu, just like the +scratchpad. + +Note that this bug fix is incomplete, there is a related issue. + +avx2 and normal implementation might use slightly different areas of the +map array space due to the avx2 alignment requirements, so +m->scratch (generic/fallback implementation) and ->scratch_aligned +(avx) may partially overlap. scratch and scratch_aligned are not distinct +objects, the latter is just the aligned address of the former. + +After this change, write to scratch_align->map_index may write to +scratch->map, so this issue becomes more prominent, we can set to 1 +a bit in the supposedly-all-zero area of scratch->map[]. + +A followup patch will remove the scratch_aligned and makes generic and +avx code use the same (aligned) area. + +Its done in a separate change to ease review. + +Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges") +Reviewed-by: Stefano Brivio +Signed-off-by: Florian Westphal +Signed-off-by: Pablo Neira Ayuso +Signed-off-by: Sasha Levin +--- + net/netfilter/nft_set_pipapo.c | 41 ++++++++++++++++++----------- + net/netfilter/nft_set_pipapo.h | 14 ++++++++-- + net/netfilter/nft_set_pipapo_avx2.c | 15 +++++------ + 3 files changed, 44 insertions(+), 26 deletions(-) + +diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c +index 3ff31043f714..58e595a84cd0 100644 +--- a/net/netfilter/nft_set_pipapo.c ++++ b/net/netfilter/nft_set_pipapo.c +@@ -342,9 +342,6 @@ + #include "nft_set_pipapo_avx2.h" + #include "nft_set_pipapo.h" + +-/* Current working bitmap index, toggled between field matches */ +-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); +- + /** + * pipapo_refill() - For each set bit, set bits from selected mapping table item + * @map: Bitmap to be scanned for set bits +@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) + { + struct nft_pipapo *priv = nft_set_priv(set); ++ struct nft_pipapo_scratch *scratch; + unsigned long *res_map, *fill_map; + u8 genmask = nft_genmask_cur(net); + const u8 *rp = (const u8 *)key; +@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + + local_bh_disable(); + +- map_index = raw_cpu_read(nft_pipapo_scratch_index); +- + m = rcu_dereference(priv->match); + + if (unlikely(!m || !*raw_cpu_ptr(m->scratch))) + goto out; + +- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0); +- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max); ++ scratch = *raw_cpu_ptr(m->scratch); ++ ++ map_index = scratch->map_index; ++ ++ res_map = scratch->map + (map_index ? m->bsize_max : 0); ++ fill_map = scratch->map + (map_index ? 0 : m->bsize_max); + + memset(res_map, 0xff, m->bsize_max * sizeof(*res_map)); + +@@ -460,7 +460,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt, + last); + if (b < 0) { +- raw_cpu_write(nft_pipapo_scratch_index, map_index); ++ scratch->map_index = map_index; + local_bh_enable(); + + return false; +@@ -477,7 +477,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, + * current inactive bitmap is clean and can be reused as + * *next* bitmap (not initial) for the next packet. + */ +- raw_cpu_write(nft_pipapo_scratch_index, map_index); ++ scratch->map_index = map_index; + local_bh_enable(); + + return true; +@@ -1114,12 +1114,12 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + int i; + + for_each_possible_cpu(i) { +- unsigned long *scratch; ++ struct nft_pipapo_scratch *scratch; + #ifdef NFT_PIPAPO_ALIGN +- unsigned long *scratch_aligned; ++ void *scratch_aligned; + #endif +- +- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 + ++ scratch = kzalloc_node(struct_size(scratch, map, ++ bsize_max * 2) + + NFT_PIPAPO_ALIGN_HEADROOM, + GFP_KERNEL, cpu_to_node(i)); + if (!scratch) { +@@ -1138,7 +1138,16 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone, + *per_cpu_ptr(clone->scratch, i) = scratch; + + #ifdef NFT_PIPAPO_ALIGN +- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch); ++ /* Align &scratch->map (not the struct itself): the extra ++ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node() ++ * above guarantee we can waste up to those bytes in order ++ * to align the map field regardless of its offset within ++ * the struct. ++ */ ++ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM); ++ ++ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map); ++ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map); + *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned; + #endif + } +@@ -2130,7 +2139,7 @@ static int nft_pipapo_init(const struct nft_set *set, + m->field_count = field_count; + m->bsize_max = 0; + +- m->scratch = alloc_percpu(unsigned long *); ++ m->scratch = alloc_percpu(struct nft_pipapo_scratch *); + if (!m->scratch) { + err = -ENOMEM; + goto out_scratch; +@@ -2139,7 +2148,7 @@ static int nft_pipapo_init(const struct nft_set *set, + *per_cpu_ptr(m->scratch, i) = NULL; + + #ifdef NFT_PIPAPO_ALIGN +- m->scratch_aligned = alloc_percpu(unsigned long *); ++ m->scratch_aligned = alloc_percpu(struct nft_pipapo_scratch *); + if (!m->scratch_aligned) { + err = -ENOMEM; + goto out_free; +diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h +index 2e164a319945..75b1340c6335 100644 +--- a/net/netfilter/nft_set_pipapo.h ++++ b/net/netfilter/nft_set_pipapo.h +@@ -130,6 +130,16 @@ struct nft_pipapo_field { + union nft_pipapo_map_bucket *mt; + }; + ++/** ++ * struct nft_pipapo_scratch - percpu data used for lookup and matching ++ * @map_index: Current working bitmap index, toggled between field matches ++ * @map: store partial matching results during lookup ++ */ ++struct nft_pipapo_scratch { ++ u8 map_index; ++ unsigned long map[]; ++}; ++ + /** + * struct nft_pipapo_match - Data used for lookup and matching + * @field_count Amount of fields in set +@@ -142,9 +152,9 @@ struct nft_pipapo_field { + struct nft_pipapo_match { + int field_count; + #ifdef NFT_PIPAPO_ALIGN +- unsigned long * __percpu *scratch_aligned; ++ struct nft_pipapo_scratch * __percpu *scratch_aligned; + #endif +- unsigned long * __percpu *scratch; ++ struct nft_pipapo_scratch * __percpu *scratch; + size_t bsize_max; + struct rcu_head rcu; + struct nft_pipapo_field f[] __counted_by(field_count); +diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c +index 52e0d026d30a..78213c73af2e 100644 +--- a/net/netfilter/nft_set_pipapo_avx2.c ++++ b/net/netfilter/nft_set_pipapo_avx2.c +@@ -71,9 +71,6 @@ + #define NFT_PIPAPO_AVX2_ZERO(reg) \ + asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg) + +-/* Current working bitmap index, toggled between field matches */ +-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index); +- + /** + * nft_pipapo_avx2_prepare() - Prepare before main algorithm body + * +@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) + { + struct nft_pipapo *priv = nft_set_priv(set); +- unsigned long *res, *fill, *scratch; ++ struct nft_pipapo_scratch *scratch; + u8 genmask = nft_genmask_cur(net); + const u8 *rp = (const u8 *)key; + struct nft_pipapo_match *m; + struct nft_pipapo_field *f; ++ unsigned long *res, *fill; + bool map_index; + int i, ret = 0; + +@@ -1146,10 +1144,11 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + kernel_fpu_end(); + return false; + } +- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index); + +- res = scratch + (map_index ? m->bsize_max : 0); +- fill = scratch + (map_index ? 0 : m->bsize_max); ++ map_index = scratch->map_index; ++ ++ res = scratch->map + (map_index ? m->bsize_max : 0); ++ fill = scratch->map + (map_index ? 0 : m->bsize_max); + + /* Starting map doesn't need to be set for this implementation */ + +@@ -1221,7 +1220,7 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set, + + out: + if (i % 2) +- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index); ++ scratch->map_index = !map_index; + kernel_fpu_end(); + + return ret >= 0; +-- +2.43.0 + diff --git a/queue-6.6/octeontx2-pf-fix-a-memleak-otx2_sq_init.patch b/queue-6.6/octeontx2-pf-fix-a-memleak-otx2_sq_init.patch new file mode 100644 index 00000000000..94368b435db --- /dev/null +++ b/queue-6.6/octeontx2-pf-fix-a-memleak-otx2_sq_init.patch @@ -0,0 +1,57 @@ +From f48ec9009147809e6ec38ad4df5005d804d7ba79 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 20:47:13 +0800 +Subject: octeontx2-pf: Fix a memleak otx2_sq_init + +From: Zhipeng Lu + +[ Upstream commit b09b58e31b0f43d76f79b9943da3fb7c2843dcbb ] + +When qmem_alloc and pfvf->hw_ops->sq_aq_init fails, sq->sg should be +freed to prevent memleak. + +Fixes: c9c12d339d93 ("octeontx2-pf: Add support for PTP clock") +Signed-off-by: Zhipeng Lu +Acked-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + .../ethernet/marvell/octeontx2/nic/otx2_common.c | 14 ++++++++++++-- + 1 file changed, 12 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +index 629cf1659e5f..e6df4e6a78ab 100644 +--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c ++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +@@ -951,8 +951,11 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) + if (pfvf->ptp && qidx < pfvf->hw.tx_queues) { + err = qmem_alloc(pfvf->dev, &sq->timestamps, qset->sqe_cnt, + sizeof(*sq->timestamps)); +- if (err) ++ if (err) { ++ kfree(sq->sg); ++ sq->sg = NULL; + return err; ++ } + } + + sq->head = 0; +@@ -968,7 +971,14 @@ int otx2_sq_init(struct otx2_nic *pfvf, u16 qidx, u16 sqb_aura) + sq->stats.bytes = 0; + sq->stats.pkts = 0; + +- return pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); ++ err = pfvf->hw_ops->sq_aq_init(pfvf, qidx, sqb_aura); ++ if (err) { ++ kfree(sq->sg); ++ sq->sg = NULL; ++ return err; ++ } ++ ++ return 0; + + } + +-- +2.43.0 + diff --git a/queue-6.6/ppp_async-limit-mru-to-64k.patch b/queue-6.6/ppp_async-limit-mru-to-64k.patch new file mode 100644 index 00000000000..beb71610bd0 --- /dev/null +++ b/queue-6.6/ppp_async-limit-mru-to-64k.patch @@ -0,0 +1,91 @@ +From 94a7a5275ee2b155c60aafb87fb0e4ff66ee62a9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 Feb 2024 17:10:04 +0000 +Subject: ppp_async: limit MRU to 64K + +From: Eric Dumazet + +[ Upstream commit cb88cb53badb8aeb3955ad6ce80b07b598e310b8 ] + +syzbot triggered a warning [1] in __alloc_pages(): + +WARN_ON_ONCE_GFP(order > MAX_PAGE_ORDER, gfp) + +Willem fixed a similar issue in commit c0a2a1b0d631 ("ppp: limit MRU to 64K") + +Adopt the same sanity check for ppp_async_ioctl(PPPIOCSMRU) + +[1]: + + WARNING: CPU: 1 PID: 11 at mm/page_alloc.c:4543 __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 +Modules linked in: +CPU: 1 PID: 11 Comm: kworker/u4:0 Not tainted 6.8.0-rc2-syzkaller-g41bccc98fb79 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 11/17/2023 +Workqueue: events_unbound flush_to_ldisc +pstate: 204000c5 (nzCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--) + pc : __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 + lr : __alloc_pages+0xc8/0x698 mm/page_alloc.c:4537 +sp : ffff800093967580 +x29: ffff800093967660 x28: ffff8000939675a0 x27: dfff800000000000 +x26: ffff70001272ceb4 x25: 0000000000000000 x24: ffff8000939675c0 +x23: 0000000000000000 x22: 0000000000060820 x21: 1ffff0001272ceb8 +x20: ffff8000939675e0 x19: 0000000000000010 x18: ffff800093967120 +x17: ffff800083bded5c x16: ffff80008ac97500 x15: 0000000000000005 +x14: 1ffff0001272cebc x13: 0000000000000000 x12: 0000000000000000 +x11: ffff70001272cec1 x10: 1ffff0001272cec0 x9 : 0000000000000001 +x8 : ffff800091c91000 x7 : 0000000000000000 x6 : 000000000000003f +x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000020 +x2 : 0000000000000008 x1 : 0000000000000000 x0 : ffff8000939675e0 +Call trace: + __alloc_pages+0x308/0x698 mm/page_alloc.c:4543 + __alloc_pages_node include/linux/gfp.h:238 [inline] + alloc_pages_node include/linux/gfp.h:261 [inline] + __kmalloc_large_node+0xbc/0x1fc mm/slub.c:3926 + __do_kmalloc_node mm/slub.c:3969 [inline] + __kmalloc_node_track_caller+0x418/0x620 mm/slub.c:4001 + kmalloc_reserve+0x17c/0x23c net/core/skbuff.c:590 + __alloc_skb+0x1c8/0x3d8 net/core/skbuff.c:651 + __netdev_alloc_skb+0xb8/0x3e8 net/core/skbuff.c:715 + netdev_alloc_skb include/linux/skbuff.h:3235 [inline] + dev_alloc_skb include/linux/skbuff.h:3248 [inline] + ppp_async_input drivers/net/ppp/ppp_async.c:863 [inline] + ppp_asynctty_receive+0x588/0x186c drivers/net/ppp/ppp_async.c:341 + tty_ldisc_receive_buf+0x12c/0x15c drivers/tty/tty_buffer.c:390 + tty_port_default_receive_buf+0x74/0xac drivers/tty/tty_port.c:37 + receive_buf drivers/tty/tty_buffer.c:444 [inline] + flush_to_ldisc+0x284/0x6e4 drivers/tty/tty_buffer.c:494 + process_one_work+0x694/0x1204 kernel/workqueue.c:2633 + process_scheduled_works kernel/workqueue.c:2706 [inline] + worker_thread+0x938/0xef4 kernel/workqueue.c:2787 + kthread+0x288/0x310 kernel/kthread.c:388 + ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:860 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-and-tested-by: syzbot+c5da1f087c9e4ec6c933@syzkaller.appspotmail.com +Signed-off-by: Eric Dumazet +Reviewed-by: Willem de Bruijn +Link: https://lore.kernel.org/r/20240205171004.1059724-1-edumazet@google.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + drivers/net/ppp/ppp_async.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/drivers/net/ppp/ppp_async.c b/drivers/net/ppp/ppp_async.c +index fbaaa8c102a1..e94a4b08fd63 100644 +--- a/drivers/net/ppp/ppp_async.c ++++ b/drivers/net/ppp/ppp_async.c +@@ -460,6 +460,10 @@ ppp_async_ioctl(struct ppp_channel *chan, unsigned int cmd, unsigned long arg) + case PPPIOCSMRU: + if (get_user(val, p)) + break; ++ if (val > U16_MAX) { ++ err = -EINVAL; ++ break; ++ } + if (val < PPP_MRU) + val = PPP_MRU; + ap->mru = val; +-- +2.43.0 + diff --git a/queue-6.6/riscv-declare-overflow_stack-as-exported-from-traps..patch b/queue-6.6/riscv-declare-overflow_stack-as-exported-from-traps..patch new file mode 100644 index 00000000000..3b3ab9254a3 --- /dev/null +++ b/queue-6.6/riscv-declare-overflow_stack-as-exported-from-traps..patch @@ -0,0 +1,47 @@ +From 963d9c8921a05bb13ced78224e8812b1873dcf00 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 23 Nov 2023 13:42:14 +0000 +Subject: riscv: declare overflow_stack as exported from traps.c + +From: Ben Dooks + +[ Upstream commit 2cf963787529f615f7c93bdcf13a5e82029e7f38 ] + +The percpu area overflow_stacks is exported from arch/riscv/kernel/traps.c +for use in the entry code, but is not declared anywhere. Add the relevant +declaration to arch/riscv/include/asm/stacktrace.h to silence the following +sparse warning: + +arch/riscv/kernel/traps.c:395:1: warning: symbol '__pcpu_scope_overflow_stack' was not declared. Should it be static? + +We don't add the stackinfo_get_overflow() call as for some of the other +architectures as this doesn't seem to be used yet, so just silence the +warning. + +Signed-off-by: Ben Dooks +Reviewed-by: Conor Dooley +Fixes: be97d0db5f44 ("riscv: VMAP_STACK overflow detection thread-safe") +Link: https://lore.kernel.org/r/20231123134214.81481-1-ben.dooks@codethink.co.uk +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/stacktrace.h | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h +index f7e8ef2418b9..b1495a7e06ce 100644 +--- a/arch/riscv/include/asm/stacktrace.h ++++ b/arch/riscv/include/asm/stacktrace.h +@@ -21,4 +21,9 @@ static inline bool on_thread_stack(void) + return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1)); + } + ++ ++#ifdef CONFIG_VMAP_STACK ++DECLARE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack); ++#endif /* CONFIG_VMAP_STACK */ ++ + #endif /* _ASM_RISCV_STACKTRACE_H */ +-- +2.43.0 + diff --git a/queue-6.6/riscv-fix-arch_hugetlb_migration_supported-for-napot.patch b/queue-6.6/riscv-fix-arch_hugetlb_migration_supported-for-napot.patch new file mode 100644 index 00000000000..e7e4f829c64 --- /dev/null +++ b/queue-6.6/riscv-fix-arch_hugetlb_migration_supported-for-napot.patch @@ -0,0 +1,87 @@ +From 06549748622b3cbbd58b9f2071c0bca2f8fec52a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Jan 2024 13:01:14 +0100 +Subject: riscv: Fix arch_hugetlb_migration_supported() for NAPOT + +From: Alexandre Ghiti + +[ Upstream commit ce68c035457bdd025a9961e0ba2157323090c581 ] + +arch_hugetlb_migration_supported() must be reimplemented to add support +for NAPOT hugepages, which is done here. + +Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240130120114.106003-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/hugetlb.h | 3 +++ + arch/riscv/mm/hugetlbpage.c | 16 +++++++++++++--- + 2 files changed, 16 insertions(+), 3 deletions(-) + +diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h +index 4c5b0e929890..20f9c3ba2341 100644 +--- a/arch/riscv/include/asm/hugetlb.h ++++ b/arch/riscv/include/asm/hugetlb.h +@@ -11,6 +11,9 @@ static inline void arch_clear_hugepage_flags(struct page *page) + } + #define arch_clear_hugepage_flags arch_clear_hugepage_flags + ++bool arch_hugetlb_migration_supported(struct hstate *h); ++#define arch_hugetlb_migration_supported arch_hugetlb_migration_supported ++ + #ifdef CONFIG_RISCV_ISA_SVNAPOT + #define __HAVE_ARCH_HUGE_PTE_CLEAR + void huge_pte_clear(struct mm_struct *mm, unsigned long addr, +diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c +index 87af75ee7186..e7b69281875b 100644 +--- a/arch/riscv/mm/hugetlbpage.c ++++ b/arch/riscv/mm/hugetlbpage.c +@@ -364,7 +364,7 @@ void huge_pte_clear(struct mm_struct *mm, + pte_clear(mm, addr, ptep); + } + +-static __init bool is_napot_size(unsigned long size) ++static bool is_napot_size(unsigned long size) + { + unsigned long order; + +@@ -392,7 +392,7 @@ arch_initcall(napot_hugetlbpages_init); + + #else + +-static __init bool is_napot_size(unsigned long size) ++static bool is_napot_size(unsigned long size) + { + return false; + } +@@ -409,7 +409,7 @@ int pmd_huge(pmd_t pmd) + return pmd_leaf(pmd); + } + +-bool __init arch_hugetlb_valid_size(unsigned long size) ++static bool __hugetlb_valid_size(unsigned long size) + { + if (size == HPAGE_SIZE) + return true; +@@ -421,6 +421,16 @@ bool __init arch_hugetlb_valid_size(unsigned long size) + return false; + } + ++bool __init arch_hugetlb_valid_size(unsigned long size) ++{ ++ return __hugetlb_valid_size(size); ++} ++ ++bool arch_hugetlb_migration_supported(struct hstate *h) ++{ ++ return __hugetlb_valid_size(huge_page_size(h)); ++} ++ + #ifdef CONFIG_CONTIG_ALLOC + static __init int gigantic_pages_init(void) + { +-- +2.43.0 + diff --git a/queue-6.6/riscv-fix-hugetlb_mask_last_page-when-napot-is-enabl.patch b/queue-6.6/riscv-fix-hugetlb_mask_last_page-when-napot-is-enabl.patch new file mode 100644 index 00000000000..58353f9b412 --- /dev/null +++ b/queue-6.6/riscv-fix-hugetlb_mask_last_page-when-napot-is-enabl.patch @@ -0,0 +1,55 @@ +From fcc29f99f98b28cf003f3855eb63ea29aaa08147 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 20:57:41 +0100 +Subject: riscv: Fix hugetlb_mask_last_page() when NAPOT is enabled + +From: Alexandre Ghiti + +[ Upstream commit a179a4bfb694f80f2709a1d0398469e787acb974 ] + +When NAPOT is enabled, a new hugepage size is available and then we need +to make hugetlb_mask_last_page() aware of that. + +Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240117195741.1926459-3-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/mm/hugetlbpage.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c +index 24c0179565d8..87af75ee7186 100644 +--- a/arch/riscv/mm/hugetlbpage.c ++++ b/arch/riscv/mm/hugetlbpage.c +@@ -125,6 +125,26 @@ pte_t *huge_pte_offset(struct mm_struct *mm, + return pte; + } + ++unsigned long hugetlb_mask_last_page(struct hstate *h) ++{ ++ unsigned long hp_size = huge_page_size(h); ++ ++ switch (hp_size) { ++#ifndef __PAGETABLE_PMD_FOLDED ++ case PUD_SIZE: ++ return P4D_SIZE - PUD_SIZE; ++#endif ++ case PMD_SIZE: ++ return PUD_SIZE - PMD_SIZE; ++ case napot_cont_size(NAPOT_CONT64KB_ORDER): ++ return PMD_SIZE - napot_cont_size(NAPOT_CONT64KB_ORDER); ++ default: ++ break; ++ } ++ ++ return 0UL; ++} ++ + static pte_t get_clear_contig(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, +-- +2.43.0 + diff --git a/queue-6.6/riscv-fix-set_huge_pte_at-for-napot-mapping.patch b/queue-6.6/riscv-fix-set_huge_pte_at-for-napot-mapping.patch new file mode 100644 index 00000000000..020ec69fefe --- /dev/null +++ b/queue-6.6/riscv-fix-set_huge_pte_at-for-napot-mapping.patch @@ -0,0 +1,90 @@ +From c5383af86f844d30c9dd23a18ead2e8a238ad447 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 20:57:40 +0100 +Subject: riscv: Fix set_huge_pte_at() for NAPOT mapping + +From: Alexandre Ghiti + +[ Upstream commit 1458eb2c9d88ad4b35eb6d6a4aa1d43d8fbf7f62 ] + +As stated by the privileged specification, we must clear a NAPOT +mapping and emit a sfence.vma before setting a new translation. + +Fixes: 82a1a1f3bfb6 ("riscv: mm: support Svnapot in hugetlb page") +Signed-off-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240117195741.1926459-2-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/mm/hugetlbpage.c | 42 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 40 insertions(+), 2 deletions(-) + +diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c +index b52f0210481f..24c0179565d8 100644 +--- a/arch/riscv/mm/hugetlbpage.c ++++ b/arch/riscv/mm/hugetlbpage.c +@@ -177,13 +177,36 @@ pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags) + return entry; + } + ++static void clear_flush(struct mm_struct *mm, ++ unsigned long addr, ++ pte_t *ptep, ++ unsigned long pgsize, ++ unsigned long ncontig) ++{ ++ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0); ++ unsigned long i, saddr = addr; ++ ++ for (i = 0; i < ncontig; i++, addr += pgsize, ptep++) ++ ptep_get_and_clear(mm, addr, ptep); ++ ++ flush_tlb_range(&vma, saddr, addr); ++} ++ ++/* ++ * When dealing with NAPOT mappings, the privileged specification indicates that ++ * "if an update needs to be made, the OS generally should first mark all of the ++ * PTEs invalid, then issue SFENCE.VMA instruction(s) covering all 4 KiB regions ++ * within the range, [...] then update the PTE(s), as described in Section ++ * 4.2.1.". That's the equivalent of the Break-Before-Make approach used by ++ * arm64. ++ */ + void set_huge_pte_at(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, + pte_t pte, + unsigned long sz) + { +- unsigned long hugepage_shift; ++ unsigned long hugepage_shift, pgsize; + int i, pte_num; + + if (sz >= PGDIR_SIZE) +@@ -198,7 +221,22 @@ void set_huge_pte_at(struct mm_struct *mm, + hugepage_shift = PAGE_SHIFT; + + pte_num = sz >> hugepage_shift; +- for (i = 0; i < pte_num; i++, ptep++, addr += (1 << hugepage_shift)) ++ pgsize = 1 << hugepage_shift; ++ ++ if (!pte_present(pte)) { ++ for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) ++ set_ptes(mm, addr, ptep, pte, 1); ++ return; ++ } ++ ++ if (!pte_napot(pte)) { ++ set_ptes(mm, addr, ptep, pte, 1); ++ return; ++ } ++ ++ clear_flush(mm, addr, ptep, pgsize, pte_num); ++ ++ for (i = 0; i < pte_num; i++, ptep++, addr += pgsize) + set_pte_at(mm, addr, ptep, pte); + } + +-- +2.43.0 + diff --git a/queue-6.6/riscv-flush-the-tlb-when-a-page-directory-is-freed.patch b/queue-6.6/riscv-flush-the-tlb-when-a-page-directory-is-freed.patch new file mode 100644 index 00000000000..8f684f83b46 --- /dev/null +++ b/queue-6.6/riscv-flush-the-tlb-when-a-page-directory-is-freed.patch @@ -0,0 +1,38 @@ +From bdbcb6164494d98c2e4e83c857737f141855e14b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 28 Jan 2024 13:04:05 +0100 +Subject: riscv: Flush the tlb when a page directory is freed + +From: Alexandre Ghiti + +[ Upstream commit 97cf301fa42e8ea6e0a24de97bc0abcdc87d9504 ] + +The riscv privileged specification mandates to flush the TLB whenever a +page directory is modified, so add that to tlb_flush(). + +Fixes: c5e9b2c2ae82 ("riscv: Improve tlb_flush()") +Signed-off-by: Alexandre Ghiti +Reviewed-by: Charlie Jenkins +Link: https://lore.kernel.org/r/20240128120405.25876-1-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/tlb.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h +index 1eb5682b2af6..50b63b5c15bd 100644 +--- a/arch/riscv/include/asm/tlb.h ++++ b/arch/riscv/include/asm/tlb.h +@@ -16,7 +16,7 @@ static void tlb_flush(struct mmu_gather *tlb); + static inline void tlb_flush(struct mmu_gather *tlb) + { + #ifdef CONFIG_MMU +- if (tlb->fullmm || tlb->need_flush_all) ++ if (tlb->fullmm || tlb->need_flush_all || tlb->freed_tables) + flush_tlb_mm(tlb->mm); + else + flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, +-- +2.43.0 + diff --git a/queue-6.6/riscv-improve-flush_tlb_kernel_range.patch b/queue-6.6/riscv-improve-flush_tlb_kernel_range.patch new file mode 100644 index 00000000000..c62ed4398da --- /dev/null +++ b/queue-6.6/riscv-improve-flush_tlb_kernel_range.patch @@ -0,0 +1,130 @@ +From d68ebd8c13fa7dbb70d5b0b57a2fbcadb7c20c2c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Oct 2023 14:30:28 +0100 +Subject: riscv: Improve flush_tlb_kernel_range() + +From: Alexandre Ghiti + +[ Upstream commit 5e22bfd520ea8740e9a20314d2a890baf304c9d2 ] + +This function used to simply flush the whole tlb of all harts, be more +subtile and try to only flush the range. + +The problem is that we can only use PAGE_SIZE as stride since we don't know +the size of the underlying mapping and then this function will be improved +only if the size of the region to flush is < threshold * PAGE_SIZE. + +Signed-off-by: Alexandre Ghiti +Reviewed-by: Andrew Jones +Tested-by: Lad Prabhakar # On RZ/Five SMARC +Reviewed-by: Samuel Holland +Tested-by: Samuel Holland +Link: https://lore.kernel.org/r/20231030133027.19542-5-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Stable-dep-of: d9807d60c145 ("riscv: mm: execute local TLB flush after populating vmemmap") +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/tlbflush.h | 11 +++++----- + arch/riscv/mm/tlbflush.c | 34 ++++++++++++++++++++++--------- + 2 files changed, 30 insertions(+), 15 deletions(-) + +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index 170a49c531c6..8f3418c5f172 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -40,6 +40,7 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); + void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); ++void flush_tlb_kernel_range(unsigned long start, unsigned long end); + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + #define __HAVE_ARCH_FLUSH_PMD_TLB_RANGE + void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, +@@ -56,15 +57,15 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, + local_flush_tlb_all(); + } + +-#define flush_tlb_mm(mm) flush_tlb_all() +-#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() +-#endif /* !CONFIG_SMP || !CONFIG_MMU */ +- + /* Flush a range of kernel pages */ + static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) + { +- flush_tlb_all(); ++ local_flush_tlb_all(); + } + ++#define flush_tlb_mm(mm) flush_tlb_all() ++#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() ++#endif /* !CONFIG_SMP || !CONFIG_MMU */ ++ + #endif /* _ASM_RISCV_TLBFLUSH_H */ +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index 88fa8b18ca22..8723adc884c7 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -96,20 +96,27 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) + { + struct flush_tlb_range_data ftd; +- struct cpumask *cmask = mm_cpumask(mm); ++ const struct cpumask *cmask; + unsigned long asid = FLUSH_TLB_NO_ASID; +- unsigned int cpuid; + bool broadcast; + +- if (cpumask_empty(cmask)) +- return; ++ if (mm) { ++ unsigned int cpuid; ++ ++ cmask = mm_cpumask(mm); ++ if (cpumask_empty(cmask)) ++ return; + +- cpuid = get_cpu(); +- /* check if the tlbflush needs to be sent to other CPUs */ +- broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; ++ cpuid = get_cpu(); ++ /* check if the tlbflush needs to be sent to other CPUs */ ++ broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; + +- if (static_branch_unlikely(&use_asid_allocator)) +- asid = atomic_long_read(&mm->context.id) & asid_mask; ++ if (static_branch_unlikely(&use_asid_allocator)) ++ asid = atomic_long_read(&mm->context.id) & asid_mask; ++ } else { ++ cmask = cpu_online_mask; ++ broadcast = true; ++ } + + if (broadcast) { + if (riscv_use_ipi_for_rfence()) { +@@ -127,7 +134,8 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + local_flush_tlb_range_asid(start, size, stride, asid); + } + +- put_cpu(); ++ if (mm) ++ put_cpu(); + } + + void flush_tlb_mm(struct mm_struct *mm) +@@ -152,6 +160,12 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + { + __flush_tlb_range(vma->vm_mm, start, end - start, PAGE_SIZE); + } ++ ++void flush_tlb_kernel_range(unsigned long start, unsigned long end) ++{ ++ __flush_tlb_range(NULL, start, end - start, PAGE_SIZE); ++} ++ + #ifdef CONFIG_TRANSPARENT_HUGEPAGE + void flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end) +-- +2.43.0 + diff --git a/queue-6.6/riscv-improve-tlb_flush.patch b/queue-6.6/riscv-improve-tlb_flush.patch new file mode 100644 index 00000000000..dca0dc7b15b --- /dev/null +++ b/queue-6.6/riscv-improve-tlb_flush.patch @@ -0,0 +1,88 @@ +From e95d1ca9afe35fd9626410867ffe478ce6359061 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Oct 2023 14:30:25 +0100 +Subject: riscv: Improve tlb_flush() + +From: Alexandre Ghiti + +[ Upstream commit c5e9b2c2ae82231d85d9650854e7b3e97dde33da ] + +For now, tlb_flush() simply calls flush_tlb_mm() which results in a +flush of the whole TLB. So let's use mmu_gather fields to provide a more +fine-grained flush of the TLB. + +Signed-off-by: Alexandre Ghiti +Reviewed-by: Andrew Jones +Reviewed-by: Samuel Holland +Tested-by: Lad Prabhakar # On RZ/Five SMARC +Link: https://lore.kernel.org/r/20231030133027.19542-2-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Stable-dep-of: d9807d60c145 ("riscv: mm: execute local TLB flush after populating vmemmap") +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/tlb.h | 8 +++++++- + arch/riscv/include/asm/tlbflush.h | 3 +++ + arch/riscv/mm/tlbflush.c | 7 +++++++ + 3 files changed, 17 insertions(+), 1 deletion(-) + +diff --git a/arch/riscv/include/asm/tlb.h b/arch/riscv/include/asm/tlb.h +index 120bcf2ed8a8..1eb5682b2af6 100644 +--- a/arch/riscv/include/asm/tlb.h ++++ b/arch/riscv/include/asm/tlb.h +@@ -15,7 +15,13 @@ static void tlb_flush(struct mmu_gather *tlb); + + static inline void tlb_flush(struct mmu_gather *tlb) + { +- flush_tlb_mm(tlb->mm); ++#ifdef CONFIG_MMU ++ if (tlb->fullmm || tlb->need_flush_all) ++ flush_tlb_mm(tlb->mm); ++ else ++ flush_tlb_mm_range(tlb->mm, tlb->start, tlb->end, ++ tlb_get_unmap_size(tlb)); ++#endif + } + + #endif /* _ASM_RISCV_TLB_H */ +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index a09196f8de68..f5c4fb0ae642 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -32,6 +32,8 @@ static inline void local_flush_tlb_page(unsigned long addr) + #if defined(CONFIG_SMP) && defined(CONFIG_MMU) + void flush_tlb_all(void); + void flush_tlb_mm(struct mm_struct *mm); ++void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, ++ unsigned long end, unsigned int page_size); + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr); + void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, + unsigned long end); +@@ -52,6 +54,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, + } + + #define flush_tlb_mm(mm) flush_tlb_all() ++#define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() + #endif /* !CONFIG_SMP || !CONFIG_MMU */ + + /* Flush a range of kernel pages */ +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index 77be59aadc73..fa03289853d8 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -132,6 +132,13 @@ void flush_tlb_mm(struct mm_struct *mm) + __flush_tlb_range(mm, 0, -1, PAGE_SIZE); + } + ++void flush_tlb_mm_range(struct mm_struct *mm, ++ unsigned long start, unsigned long end, ++ unsigned int page_size) ++{ ++ __flush_tlb_range(mm, start, end - start, page_size); ++} ++ + void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr) + { + __flush_tlb_range(vma->vm_mm, addr, PAGE_SIZE, PAGE_SIZE); +-- +2.43.0 + diff --git a/queue-6.6/riscv-make-__flush_tlb_range-loop-over-pte-instead-o.patch b/queue-6.6/riscv-make-__flush_tlb_range-loop-over-pte-instead-o.patch new file mode 100644 index 00000000000..6acc918b20c --- /dev/null +++ b/queue-6.6/riscv-make-__flush_tlb_range-loop-over-pte-instead-o.patch @@ -0,0 +1,305 @@ +From d3d678ebd1f3c23607c172dd70ae83483cc458c2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 30 Oct 2023 14:30:27 +0100 +Subject: riscv: Make __flush_tlb_range() loop over pte instead of flushing the + whole tlb + +From: Alexandre Ghiti + +[ Upstream commit 9d4e8d5fa7dbbb606b355f40d918a1feef821bc5 ] + +Currently, when the range to flush covers more than one page (a 4K page or +a hugepage), __flush_tlb_range() flushes the whole tlb. Flushing the whole +tlb comes with a greater cost than flushing a single entry so we should +flush single entries up to a certain threshold so that: +threshold * cost of flushing a single entry < cost of flushing the whole +tlb. + +Co-developed-by: Mayuresh Chitale +Signed-off-by: Mayuresh Chitale +Signed-off-by: Alexandre Ghiti +Reviewed-by: Andrew Jones +Tested-by: Lad Prabhakar # On RZ/Five SMARC +Reviewed-by: Samuel Holland +Tested-by: Samuel Holland +Link: https://lore.kernel.org/r/20231030133027.19542-4-alexghiti@rivosinc.com +Signed-off-by: Palmer Dabbelt +Stable-dep-of: d9807d60c145 ("riscv: mm: execute local TLB flush after populating vmemmap") +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/sbi.h | 3 - + arch/riscv/include/asm/tlbflush.h | 3 + + arch/riscv/kernel/sbi.c | 32 +++------ + arch/riscv/mm/tlbflush.c | 115 +++++++++++++++--------------- + 4 files changed, 72 insertions(+), 81 deletions(-) + +diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h +index 5b4a1bf5f439..b79d0228144f 100644 +--- a/arch/riscv/include/asm/sbi.h ++++ b/arch/riscv/include/asm/sbi.h +@@ -273,9 +273,6 @@ void sbi_set_timer(uint64_t stime_value); + void sbi_shutdown(void); + void sbi_send_ipi(unsigned int cpu); + int sbi_remote_fence_i(const struct cpumask *cpu_mask); +-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, +- unsigned long start, +- unsigned long size); + + int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, + unsigned long start, +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index f5c4fb0ae642..170a49c531c6 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -11,6 +11,9 @@ + #include + #include + ++#define FLUSH_TLB_MAX_SIZE ((unsigned long)-1) ++#define FLUSH_TLB_NO_ASID ((unsigned long)-1) ++ + #ifdef CONFIG_MMU + extern unsigned long asid_mask; + +diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c +index c672c8ba9a2a..5a62ed1da453 100644 +--- a/arch/riscv/kernel/sbi.c ++++ b/arch/riscv/kernel/sbi.c +@@ -11,6 +11,7 @@ + #include + #include + #include ++#include + + /* default SBI version is 0.1 */ + unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; +@@ -376,32 +377,15 @@ int sbi_remote_fence_i(const struct cpumask *cpu_mask) + } + EXPORT_SYMBOL(sbi_remote_fence_i); + +-/** +- * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote +- * harts for the specified virtual address range. +- * @cpu_mask: A cpu mask containing all the target harts. +- * @start: Start of the virtual address +- * @size: Total size of the virtual address range. +- * +- * Return: 0 on success, appropriate linux error code otherwise. +- */ +-int sbi_remote_sfence_vma(const struct cpumask *cpu_mask, +- unsigned long start, +- unsigned long size) +-{ +- return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, +- cpu_mask, start, size, 0, 0); +-} +-EXPORT_SYMBOL(sbi_remote_sfence_vma); +- + /** + * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given +- * remote harts for a virtual address range belonging to a specific ASID. ++ * remote harts for a virtual address range belonging to a specific ASID or not. + * + * @cpu_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. +- * @asid: The value of address space identifier (ASID). ++ * @asid: The value of address space identifier (ASID), or FLUSH_TLB_NO_ASID ++ * for flushing all address spaces. + * + * Return: 0 on success, appropriate linux error code otherwise. + */ +@@ -410,8 +394,12 @@ int sbi_remote_sfence_vma_asid(const struct cpumask *cpu_mask, + unsigned long size, + unsigned long asid) + { +- return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, +- cpu_mask, start, size, asid, 0); ++ if (asid == FLUSH_TLB_NO_ASID) ++ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, ++ cpu_mask, start, size, 0, 0); ++ else ++ return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, ++ cpu_mask, start, size, asid, 0); + } + EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); + +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index fa03289853d8..88fa8b18ca22 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -8,28 +8,50 @@ + + static inline void local_flush_tlb_all_asid(unsigned long asid) + { +- __asm__ __volatile__ ("sfence.vma x0, %0" +- : +- : "r" (asid) +- : "memory"); ++ if (asid != FLUSH_TLB_NO_ASID) ++ __asm__ __volatile__ ("sfence.vma x0, %0" ++ : ++ : "r" (asid) ++ : "memory"); ++ else ++ local_flush_tlb_all(); + } + + static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) + { +- __asm__ __volatile__ ("sfence.vma %0, %1" +- : +- : "r" (addr), "r" (asid) +- : "memory"); ++ if (asid != FLUSH_TLB_NO_ASID) ++ __asm__ __volatile__ ("sfence.vma %0, %1" ++ : ++ : "r" (addr), "r" (asid) ++ : "memory"); ++ else ++ local_flush_tlb_page(addr); + } + +-static inline void local_flush_tlb_range(unsigned long start, +- unsigned long size, unsigned long stride) ++/* ++ * Flush entire TLB if number of entries to be flushed is greater ++ * than the threshold below. ++ */ ++static unsigned long tlb_flush_all_threshold __read_mostly = 64; ++ ++static void local_flush_tlb_range_threshold_asid(unsigned long start, ++ unsigned long size, ++ unsigned long stride, ++ unsigned long asid) + { +- if (size <= stride) +- local_flush_tlb_page(start); +- else +- local_flush_tlb_all(); ++ unsigned long nr_ptes_in_range = DIV_ROUND_UP(size, stride); ++ int i; ++ ++ if (nr_ptes_in_range > tlb_flush_all_threshold) { ++ local_flush_tlb_all_asid(asid); ++ return; ++ } ++ ++ for (i = 0; i < nr_ptes_in_range; ++i) { ++ local_flush_tlb_page_asid(start, asid); ++ start += stride; ++ } + } + + static inline void local_flush_tlb_range_asid(unsigned long start, +@@ -37,8 +59,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, + { + if (size <= stride) + local_flush_tlb_page_asid(start, asid); +- else ++ else if (size == FLUSH_TLB_MAX_SIZE) + local_flush_tlb_all_asid(asid); ++ else ++ local_flush_tlb_range_threshold_asid(start, size, stride, asid); + } + + static void __ipi_flush_tlb_all(void *info) +@@ -51,7 +75,7 @@ void flush_tlb_all(void) + if (riscv_use_ipi_for_rfence()) + on_each_cpu(__ipi_flush_tlb_all, NULL, 1); + else +- sbi_remote_sfence_vma(NULL, 0, -1); ++ sbi_remote_sfence_vma_asid(NULL, 0, FLUSH_TLB_MAX_SIZE, FLUSH_TLB_NO_ASID); + } + + struct flush_tlb_range_data { +@@ -68,18 +92,12 @@ static void __ipi_flush_tlb_range_asid(void *info) + local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); + } + +-static void __ipi_flush_tlb_range(void *info) +-{ +- struct flush_tlb_range_data *d = info; +- +- local_flush_tlb_range(d->start, d->size, d->stride); +-} +- + static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + unsigned long size, unsigned long stride) + { + struct flush_tlb_range_data ftd; + struct cpumask *cmask = mm_cpumask(mm); ++ unsigned long asid = FLUSH_TLB_NO_ASID; + unsigned int cpuid; + bool broadcast; + +@@ -89,39 +107,24 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + cpuid = get_cpu(); + /* check if the tlbflush needs to be sent to other CPUs */ + broadcast = cpumask_any_but(cmask, cpuid) < nr_cpu_ids; +- if (static_branch_unlikely(&use_asid_allocator)) { +- unsigned long asid = atomic_long_read(&mm->context.id) & asid_mask; +- +- if (broadcast) { +- if (riscv_use_ipi_for_rfence()) { +- ftd.asid = asid; +- ftd.start = start; +- ftd.size = size; +- ftd.stride = stride; +- on_each_cpu_mask(cmask, +- __ipi_flush_tlb_range_asid, +- &ftd, 1); +- } else +- sbi_remote_sfence_vma_asid(cmask, +- start, size, asid); +- } else { +- local_flush_tlb_range_asid(start, size, stride, asid); +- } ++ ++ if (static_branch_unlikely(&use_asid_allocator)) ++ asid = atomic_long_read(&mm->context.id) & asid_mask; ++ ++ if (broadcast) { ++ if (riscv_use_ipi_for_rfence()) { ++ ftd.asid = asid; ++ ftd.start = start; ++ ftd.size = size; ++ ftd.stride = stride; ++ on_each_cpu_mask(cmask, ++ __ipi_flush_tlb_range_asid, ++ &ftd, 1); ++ } else ++ sbi_remote_sfence_vma_asid(cmask, ++ start, size, asid); + } else { +- if (broadcast) { +- if (riscv_use_ipi_for_rfence()) { +- ftd.asid = 0; +- ftd.start = start; +- ftd.size = size; +- ftd.stride = stride; +- on_each_cpu_mask(cmask, +- __ipi_flush_tlb_range, +- &ftd, 1); +- } else +- sbi_remote_sfence_vma(cmask, start, size); +- } else { +- local_flush_tlb_range(start, size, stride); +- } ++ local_flush_tlb_range_asid(start, size, stride, asid); + } + + put_cpu(); +@@ -129,7 +132,7 @@ static void __flush_tlb_range(struct mm_struct *mm, unsigned long start, + + void flush_tlb_mm(struct mm_struct *mm) + { +- __flush_tlb_range(mm, 0, -1, PAGE_SIZE); ++ __flush_tlb_range(mm, 0, FLUSH_TLB_MAX_SIZE, PAGE_SIZE); + } + + void flush_tlb_mm_range(struct mm_struct *mm, +-- +2.43.0 + diff --git a/queue-6.6/riscv-mm-execute-local-tlb-flush-after-populating-vm.patch b/queue-6.6/riscv-mm-execute-local-tlb-flush-after-populating-vm.patch new file mode 100644 index 00000000000..4632e27b356 --- /dev/null +++ b/queue-6.6/riscv-mm-execute-local-tlb-flush-after-populating-vm.patch @@ -0,0 +1,80 @@ +From a76b2f246a5410b0d6292c8c6b516de724097bf1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 17 Jan 2024 22:03:33 +0800 +Subject: riscv: mm: execute local TLB flush after populating vmemmap + +From: Vincent Chen + +[ Upstream commit d9807d60c145836043ffa602328ea1d66dc458b1 ] + +The spare_init() calls memmap_populate() many times to create VA to PA +mapping for the VMEMMAP area, where all "struct page" are located once +CONFIG_SPARSEMEM_VMEMMAP is defined. These "struct page" are later +initialized in the zone_sizes_init() function. However, during this +process, no sfence.vma instruction is executed for this VMEMMAP area. +This omission may cause the hart to fail to perform page table walk +because some data related to the address translation is invisible to the +hart. To solve this issue, the local_flush_tlb_kernel_range() is called +right after the sparse_init() to execute a sfence.vma instruction for this +VMEMMAP area, ensuring that all data related to the address translation +is visible to the hart. + +Fixes: d95f1a542c3d ("RISC-V: Implement sparsemem") +Signed-off-by: Vincent Chen +Reviewed-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20240117140333.2479667-1-vincent.chen@sifive.com +Fixes: 7a92fc8b4d20 ("mm: Introduce flush_cache_vmap_early()") +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/tlbflush.h | 1 + + arch/riscv/mm/init.c | 4 ++++ + arch/riscv/mm/tlbflush.c | 3 ++- + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h +index a60416bbe190..51664ae4852e 100644 +--- a/arch/riscv/include/asm/tlbflush.h ++++ b/arch/riscv/include/asm/tlbflush.h +@@ -67,6 +67,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, + + #define flush_tlb_mm(mm) flush_tlb_all() + #define flush_tlb_mm_range(mm, start, end, page_size) flush_tlb_all() ++#define local_flush_tlb_kernel_range(start, end) flush_tlb_all() + #endif /* !CONFIG_SMP || !CONFIG_MMU */ + + #endif /* _ASM_RISCV_TLBFLUSH_H */ +diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c +index e71dd19ac801..b50faa232b5e 100644 +--- a/arch/riscv/mm/init.c ++++ b/arch/riscv/mm/init.c +@@ -1502,6 +1502,10 @@ void __init misc_mem_init(void) + early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); + arch_numa_init(); + sparse_init(); ++#ifdef CONFIG_SPARSEMEM_VMEMMAP ++ /* The entire VMEMMAP region has been populated. Flush TLB for this region */ ++ local_flush_tlb_kernel_range(VMEMMAP_START, VMEMMAP_END); ++#endif + zone_sizes_init(); + reserve_crashkernel(); + memblock_dump_all(); +diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c +index b1ab6cf78e9e..bdee5de918e0 100644 +--- a/arch/riscv/mm/tlbflush.c ++++ b/arch/riscv/mm/tlbflush.c +@@ -65,9 +65,10 @@ static inline void local_flush_tlb_range_asid(unsigned long start, + local_flush_tlb_range_threshold_asid(start, size, stride, asid); + } + ++/* Flush a range of kernel pages without broadcasting */ + void local_flush_tlb_kernel_range(unsigned long start, unsigned long end) + { +- local_flush_tlb_range_asid(start, end, PAGE_SIZE, FLUSH_TLB_NO_ASID); ++ local_flush_tlb_range_asid(start, end - start, PAGE_SIZE, FLUSH_TLB_NO_ASID); + } + + static void __ipi_flush_tlb_all(void *info) +-- +2.43.0 + diff --git a/queue-6.6/rxrpc-fix-counting-of-new-acks-and-nacks.patch b/queue-6.6/rxrpc-fix-counting-of-new-acks-and-nacks.patch new file mode 100644 index 00000000000..be933980e36 --- /dev/null +++ b/queue-6.6/rxrpc-fix-counting-of-new-acks-and-nacks.patch @@ -0,0 +1,385 @@ +From 19f4b5e0a39f7cdd0e1d90fba4e3bd896436efc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 15:19:16 +0000 +Subject: rxrpc: Fix counting of new acks and nacks + +From: David Howells + +[ Upstream commit 41b7fa157ea1c8c3a575ca7f5f32034de9bee3ae ] + +Fix the counting of new acks and nacks when parsing a packet - something +that is used in congestion control. + +As the code stands, it merely notes if there are any nacks whereas what we +really should do is compare the previous SACK table to the new one, +assuming we get two successive ACK packets with nacks in them. However, we +really don't want to do that if we can avoid it as the tables might not +correspond directly as one may be shifted from the other - something that +will only get harder to deal with once extended ACK tables come into full +use (with a capacity of up to 8192). + +Instead, count the number of nacks shifted out of the old SACK, the number +of nacks retained in the portion still active and the number of new acks +and nacks in the new table then calculate what we need. + +Note this ends up a bit of an estimate as the Rx protocol allows acks to be +withdrawn by the receiver and packets requested to be retransmitted. + +Fixes: d57a3a151660 ("rxrpc: Save last ACK's SACK table rather than marking txbufs") +Signed-off-by: David Howells +cc: Marc Dionne +cc: "David S. Miller" +cc: Eric Dumazet +cc: Jakub Kicinski +cc: Paolo Abeni +cc: linux-afs@lists.infradead.org +cc: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + include/trace/events/rxrpc.h | 8 ++- + net/rxrpc/ar-internal.h | 20 ++++-- + net/rxrpc/call_event.c | 6 +- + net/rxrpc/call_object.c | 1 + + net/rxrpc/input.c | 115 +++++++++++++++++++++++++++++------ + 5 files changed, 122 insertions(+), 28 deletions(-) + +diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h +index f7e537f64db4..0dd4a21d172d 100644 +--- a/include/trace/events/rxrpc.h ++++ b/include/trace/events/rxrpc.h +@@ -128,6 +128,7 @@ + EM(rxrpc_skb_eaten_by_unshare_nomem, "ETN unshar-nm") \ + EM(rxrpc_skb_get_conn_secured, "GET conn-secd") \ + EM(rxrpc_skb_get_conn_work, "GET conn-work") \ ++ EM(rxrpc_skb_get_last_nack, "GET last-nack") \ + EM(rxrpc_skb_get_local_work, "GET locl-work") \ + EM(rxrpc_skb_get_reject_work, "GET rej-work ") \ + EM(rxrpc_skb_get_to_recvmsg, "GET to-recv ") \ +@@ -141,6 +142,7 @@ + EM(rxrpc_skb_put_error_report, "PUT error-rep") \ + EM(rxrpc_skb_put_input, "PUT input ") \ + EM(rxrpc_skb_put_jumbo_subpacket, "PUT jumbo-sub") \ ++ EM(rxrpc_skb_put_last_nack, "PUT last-nack") \ + EM(rxrpc_skb_put_purge, "PUT purge ") \ + EM(rxrpc_skb_put_rotate, "PUT rotate ") \ + EM(rxrpc_skb_put_unknown, "PUT unknown ") \ +@@ -1549,7 +1551,7 @@ TRACE_EVENT(rxrpc_congest, + memcpy(&__entry->sum, summary, sizeof(__entry->sum)); + ), + +- TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u r=%u b=%u u=%u d=%u l=%x%s%s%s", ++ TP_printk("c=%08x r=%08x %s q=%08x %s cw=%u ss=%u nA=%u,%u+%u,%u b=%u u=%u d=%u l=%x%s%s%s", + __entry->call, + __entry->ack_serial, + __print_symbolic(__entry->sum.ack_reason, rxrpc_ack_names), +@@ -1557,9 +1559,9 @@ TRACE_EVENT(rxrpc_congest, + __print_symbolic(__entry->sum.mode, rxrpc_congest_modes), + __entry->sum.cwnd, + __entry->sum.ssthresh, +- __entry->sum.nr_acks, __entry->sum.saw_nacks, ++ __entry->sum.nr_acks, __entry->sum.nr_retained_nacks, + __entry->sum.nr_new_acks, +- __entry->sum.nr_rot_new_acks, ++ __entry->sum.nr_new_nacks, + __entry->top - __entry->hard_ack, + __entry->sum.cumulative_acks, + __entry->sum.dup_acks, +diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h +index f6375772fa93..bda3f6690b32 100644 +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -198,11 +198,19 @@ struct rxrpc_host_header { + */ + struct rxrpc_skb_priv { + struct rxrpc_connection *conn; /* Connection referred to (poke packet) */ +- u16 offset; /* Offset of data */ +- u16 len; /* Length of data */ +- u8 flags; ++ union { ++ struct { ++ u16 offset; /* Offset of data */ ++ u16 len; /* Length of data */ ++ u8 flags; + #define RXRPC_RX_VERIFIED 0x01 +- ++ }; ++ struct { ++ rxrpc_seq_t first_ack; /* First packet in acks table */ ++ u8 nr_acks; /* Number of acks+nacks */ ++ u8 nr_nacks; /* Number of nacks */ ++ }; ++ }; + struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ + }; + +@@ -688,6 +696,7 @@ struct rxrpc_call { + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ ++ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */ + + /* Receive-phase ACK management (ACKs we send). */ + u8 ackr_reason; /* reason to ACK */ +@@ -725,7 +734,8 @@ struct rxrpc_call { + struct rxrpc_ack_summary { + u16 nr_acks; /* Number of ACKs in packet */ + u16 nr_new_acks; /* Number of new ACKs in packet */ +- u16 nr_rot_new_acks; /* Number of rotated new ACKs */ ++ u16 nr_new_nacks; /* Number of new nacks in packet */ ++ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */ + u8 ack_reason; + bool saw_nacks; /* Saw NACKs in packet */ + bool new_low_nack; /* T if new low NACK found */ +diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c +index c61efe08695d..0f78544d043b 100644 +--- a/net/rxrpc/call_event.c ++++ b/net/rxrpc/call_event.c +@@ -112,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) + void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) + { + struct rxrpc_ackpacket *ack = NULL; ++ struct rxrpc_skb_priv *sp; + struct rxrpc_txbuf *txb; + unsigned long resend_at; + rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted); +@@ -139,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb) + * explicitly NAK'd packets. + */ + if (ack_skb) { ++ sp = rxrpc_skb(ack_skb); + ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header); + +- for (i = 0; i < ack->nAcks; i++) { ++ for (i = 0; i < sp->nr_acks; i++) { + rxrpc_seq_t seq; + + if (ack->acks[i] & 1) + continue; +- seq = ntohl(ack->firstPacket) + i; ++ seq = sp->first_ack + i; + if (after(txb->seq, transmitted)) + break; + if (after(txb->seq, seq)) +diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c +index f10b37c14772..0a50341d920a 100644 +--- a/net/rxrpc/call_object.c ++++ b/net/rxrpc/call_object.c +@@ -685,6 +685,7 @@ static void rxrpc_destroy_call(struct work_struct *work) + + del_timer_sync(&call->timer); + ++ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); + rxrpc_cleanup_ring(call); + while ((txb = list_first_entry_or_null(&call->tx_sendmsg, + struct rxrpc_txbuf, call_link))) { +diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c +index 92495e73b869..9691de00ade7 100644 +--- a/net/rxrpc/input.c ++++ b/net/rxrpc/input.c +@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, + } + + cumulative_acks += summary->nr_new_acks; +- cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + +- summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; +@@ -151,6 +149,7 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, + cwnd = RXRPC_TX_MAX_WINDOW; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; ++ summary->mode = call->cong_mode; + trace_rxrpc_congest(call, summary, acked_serial, change); + if (resend) + rxrpc_resend(call, skb); +@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { + if (before_eq(txb->seq, call->acks_hard_ack)) + continue; +- summary->nr_rot_new_acks++; + if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) { + set_bit(RXRPC_CALL_TX_LAST, &call->flags); + rot_last = true; +@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + { + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + ++ if (unlikely(call->cong_last_nack)) { ++ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); ++ call->cong_last_nack = NULL; ++ } ++ + switch (__rxrpc_call_state(call)) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: +@@ -702,6 +705,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + wake_up(&call->waitq); + } + ++/* ++ * Determine how many nacks from the previous ACK have now been satisfied. ++ */ ++static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, ++ struct rxrpc_ack_summary *summary, ++ rxrpc_seq_t seq) ++{ ++ struct sk_buff *skb = call->cong_last_nack; ++ struct rxrpc_ackpacket ack; ++ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ++ unsigned int i, new_acks = 0, retained_nacks = 0; ++ rxrpc_seq_t old_seq = sp->first_ack; ++ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack); ++ ++ if (after_eq(seq, old_seq + sp->nr_acks)) { ++ summary->nr_new_acks += sp->nr_nacks; ++ summary->nr_new_acks += seq - (old_seq + sp->nr_acks); ++ summary->nr_retained_nacks = 0; ++ } else if (seq == old_seq) { ++ summary->nr_retained_nacks = sp->nr_nacks; ++ } else { ++ for (i = 0; i < sp->nr_acks; i++) { ++ if (acks[i] == RXRPC_ACK_TYPE_NACK) { ++ if (before(old_seq + i, seq)) ++ new_acks++; ++ else ++ retained_nacks++; ++ } ++ } ++ ++ summary->nr_new_acks += new_acks; ++ summary->nr_retained_nacks = retained_nacks; ++ } ++ ++ return old_seq + sp->nr_acks; ++} ++ + /* + * Process individual soft ACKs. + * +@@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, +- rxrpc_seq_t seq, int nr_acks, +- struct rxrpc_ack_summary *summary) ++static void rxrpc_input_soft_acks(struct rxrpc_call *call, ++ struct rxrpc_ack_summary *summary, ++ struct sk_buff *skb, ++ rxrpc_seq_t seq, ++ rxrpc_seq_t since) + { +- unsigned int i; ++ struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ++ unsigned int i, old_nacks = 0; ++ rxrpc_seq_t lowest_nak = seq + sp->nr_acks; ++ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + +- for (i = 0; i < nr_acks; i++) { ++ for (i = 0; i < sp->nr_acks; i++) { + if (acks[i] == RXRPC_ACK_TYPE_ACK) { + summary->nr_acks++; +- summary->nr_new_acks++; ++ if (after_eq(seq, since)) ++ summary->nr_new_acks++; + } else { +- if (!summary->saw_nacks && +- call->acks_lowest_nak != seq + i) { +- call->acks_lowest_nak = seq + i; +- summary->new_low_nack = true; +- } + summary->saw_nacks = true; ++ if (before(seq, since)) { ++ /* Overlap with previous ACK */ ++ old_nacks++; ++ } else { ++ summary->nr_new_nacks++; ++ sp->nr_nacks++; ++ } ++ ++ if (before(seq, lowest_nak)) ++ lowest_nak = seq; + } ++ seq++; ++ } ++ ++ if (lowest_nak != call->acks_lowest_nak) { ++ call->acks_lowest_nak = lowest_nak; ++ summary->new_low_nack = true; + } ++ ++ /* We *can* have more nacks than we did - the peer is permitted to drop ++ * packets it has soft-acked and re-request them. Further, it is ++ * possible for the nack distribution to change whilst the number of ++ * nacks stays the same or goes down. ++ */ ++ if (old_nacks < summary->nr_retained_nacks) ++ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; ++ summary->nr_retained_nacks = old_nacks; + } + + /* +@@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_ackinfo info; + rxrpc_serial_t ack_serial, acked_serial; +- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; ++ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; + int nr_acks, offset, ioffset; + + _enter(""); +@@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) + prev_pkt = ntohl(ack.previousPacket); + hard_ack = first_soft_ack - 1; + nr_acks = ack.nAcks; ++ sp->first_ack = first_soft_ack; ++ sp->nr_acks = nr_acks; + summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ? + ack.reason : RXRPC_ACK__INVALID); + +@@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) + if (nr_acks > 0) + skb_condense(skb); + ++ if (call->cong_last_nack) { ++ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); ++ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); ++ call->cong_last_nack = NULL; ++ } else { ++ summary.nr_new_acks = first_soft_ack - call->acks_first_seq; ++ call->acks_lowest_nak = first_soft_ack + nr_acks; ++ since = first_soft_ack; ++ } ++ + call->acks_latest_ts = skb->tstamp; + call->acks_first_seq = first_soft_ack; + call->acks_prev_seq = prev_pkt; +@@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) + case RXRPC_ACK_PING: + break; + default: +- if (after(acked_serial, call->acks_highest_serial)) ++ if (acked_serial && after(acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = acked_serial; + break; + } +@@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) + if (nr_acks > 0) { + if (offset > (int)skb->len - nr_acks) + return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); +- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack, +- nr_acks, &summary); ++ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); ++ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); ++ call->cong_last_nack = skb; + } + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && +-- +2.43.0 + diff --git a/queue-6.6/rxrpc-fix-delayed-acks-to-not-set-the-reference-seri.patch b/queue-6.6/rxrpc-fix-delayed-acks-to-not-set-the-reference-seri.patch new file mode 100644 index 00000000000..b1e0a8fc0d7 --- /dev/null +++ b/queue-6.6/rxrpc-fix-delayed-acks-to-not-set-the-reference-seri.patch @@ -0,0 +1,74 @@ +From c0e33001a35a68d232f588d79e619466db313cd1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 15:19:14 +0000 +Subject: rxrpc: Fix delayed ACKs to not set the reference serial number + +From: David Howells + +[ Upstream commit e7870cf13d20f56bfc19f9c3e89707c69cf104ef ] + +Fix the construction of delayed ACKs to not set the reference serial number +as they can't be used as an RTT reference. + +Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") +Signed-off-by: David Howells +cc: Marc Dionne +cc: "David S. Miller" +cc: Eric Dumazet +cc: Jakub Kicinski +cc: Paolo Abeni +cc: linux-afs@lists.infradead.org +cc: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/rxrpc/ar-internal.h | 1 - + net/rxrpc/call_event.c | 6 +----- + 2 files changed, 1 insertion(+), 6 deletions(-) + +diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h +index 668fdc94b299..f6375772fa93 100644 +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -692,7 +692,6 @@ struct rxrpc_call { + /* Receive-phase ACK management (ACKs we send). */ + u8 ackr_reason; /* reason to ACK */ + u16 ackr_sack_base; /* Starting slot in SACK table ring */ +- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_seq_t ackr_window; /* Base of SACK window */ + rxrpc_seq_t ackr_wtop; /* Base of SACK window */ + unsigned int ackr_nr_unacked; /* Number of unacked packets */ +diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c +index e363f21a2014..c61efe08695d 100644 +--- a/net/rxrpc/call_event.c ++++ b/net/rxrpc/call_event.c +@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial, + unsigned long expiry = rxrpc_soft_ack_delay; + unsigned long now = jiffies, ack_at; + +- call->ackr_serial = serial; +- + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + if (call->peer->srtt_us != 0) +@@ -373,7 +371,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call) + bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) + { + unsigned long now, next, t; +- rxrpc_serial_t ackr_serial; + bool resend = false, expired = false; + s32 abort_code; + +@@ -423,8 +420,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) + if (time_after_eq(now, t)) { + trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now); + cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET); +- ackr_serial = xchg(&call->ackr_serial, 0); +- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial, ++ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0, + rxrpc_propose_ack_ping_for_lost_ack); + } + +-- +2.43.0 + diff --git a/queue-6.6/rxrpc-fix-generation-of-serial-numbers-to-skip-zero.patch b/queue-6.6/rxrpc-fix-generation-of-serial-numbers-to-skip-zero.patch new file mode 100644 index 00000000000..e68d7164dd9 --- /dev/null +++ b/queue-6.6/rxrpc-fix-generation-of-serial-numbers-to-skip-zero.patch @@ -0,0 +1,169 @@ +From de46391086f403a2041e5b9e2dcab4bef84569a2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 15:19:13 +0000 +Subject: rxrpc: Fix generation of serial numbers to skip zero + +From: David Howells + +[ Upstream commit f31041417bf7f4a4df8b3bfb52cb31bbe805b934 ] + +In the Rx protocol, every packet generated is marked with a per-connection +monotonically increasing serial number. This number can be referenced in +an ACK packet generated in response to an incoming packet - thereby +allowing the sender to use this for RTT determination, amongst other +things. + +However, if the reference field in the ACK is zero, it doesn't refer to any +incoming packet (it could be a ping to find out if a packet got lost, for +example) - so we shouldn't generate zero serial numbers. + +Fix the generation of serial numbers to retry if it comes up with a zero. + +Furthermore, since the serial numbers are only ever allocated within the +I/O thread this connection is bound to, there's no need for atomics so +remove that too. + +Fixes: 17926a79320a ("[AF_RXRPC]: Provide secure RxRPC sockets for use by userspace and kernel both") +Signed-off-by: David Howells +cc: Marc Dionne +cc: "David S. Miller" +cc: Eric Dumazet +cc: Jakub Kicinski +cc: Paolo Abeni +cc: linux-afs@lists.infradead.org +cc: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/rxrpc/ar-internal.h | 16 +++++++++++++++- + net/rxrpc/conn_event.c | 2 +- + net/rxrpc/output.c | 8 ++++---- + net/rxrpc/proc.c | 2 +- + net/rxrpc/rxkad.c | 4 ++-- + 5 files changed, 23 insertions(+), 9 deletions(-) + +diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h +index e8b43408136a..668fdc94b299 100644 +--- a/net/rxrpc/ar-internal.h ++++ b/net/rxrpc/ar-internal.h +@@ -506,7 +506,7 @@ struct rxrpc_connection { + enum rxrpc_call_completion completion; /* Completion condition */ + s32 abort_code; /* Abort code of connection abort */ + int debug_id; /* debug ID for printks */ +- atomic_t serial; /* packet serial number counter */ ++ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */ + unsigned int hi_serial; /* highest serial number received */ + u32 service_id; /* Service ID, possibly upgraded */ + u32 security_level; /* Security level selected */ +@@ -818,6 +818,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb) + + #include + ++/* ++ * Allocate the next serial number on a connection. 0 must be skipped. ++ */ ++static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn) ++{ ++ rxrpc_serial_t serial; ++ ++ serial = conn->tx_serial; ++ if (serial == 0) ++ serial = 1; ++ conn->tx_serial = serial + 1; ++ return serial; ++} ++ + /* + * af_rxrpc.c + */ +diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c +index 95f4bc206b3d..ec5eae60ab0c 100644 +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -117,7 +117,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + iov[2].iov_base = &ack_info; + iov[2].iov_len = sizeof(ack_info); + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + + pkt.whdr.epoch = htonl(conn->proto.epoch); + pkt.whdr.cid = htonl(conn->proto.cid | channel); +diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c +index a0906145e829..4a292f860ae3 100644 +--- a/net/rxrpc/output.c ++++ b/net/rxrpc/output.c +@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) + iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n; + len = iov[0].iov_len; + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + txb->wire.serial = htonl(serial); + trace_rxrpc_tx_ack(call->debug_id, serial, + ntohl(txb->ack.firstPacket), +@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) + iov[0].iov_base = &pkt; + iov[0].iov_len = sizeof(pkt); + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + pkt.whdr.serial = htonl(serial); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt)); +@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) + _enter("%x,{%d}", txb->seq, txb->len); + + /* Each transmission of a Tx packet needs a new serial number */ +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + txb->wire.serial = htonl(serial); + + if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) && +@@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn) + + len = iov[0].iov_len + iov[1].iov_len; + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + whdr.serial = htonl(serial); + + iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len); +diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c +index 682636d3b060..208312c244f6 100644 +--- a/net/rxrpc/proc.c ++++ b/net/rxrpc/proc.c +@@ -181,7 +181,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) + atomic_read(&conn->active), + state, + key_serial(conn->key), +- atomic_read(&conn->serial), ++ conn->tx_serial, + conn->hi_serial, + conn->channels[0].call_id, + conn->channels[1].call_id, +diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c +index b52dedcebce0..6b32d61d4cdc 100644 +--- a/net/rxrpc/rxkad.c ++++ b/net/rxrpc/rxkad.c +@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) + + len = iov[0].iov_len + iov[1].iov_len; + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + whdr.serial = htonl(serial); + + ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len); +@@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn, + + len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len; + +- serial = atomic_inc_return(&conn->serial); ++ serial = rxrpc_get_next_serial(conn); + whdr.serial = htonl(serial); + + rxrpc_local_dont_fragment(conn->local, false); +-- +2.43.0 + diff --git a/queue-6.6/rxrpc-fix-response-to-ping-response-acks-to-a-dead-c.patch b/queue-6.6/rxrpc-fix-response-to-ping-response-acks-to-a-dead-c.patch new file mode 100644 index 00000000000..a50cae0808c --- /dev/null +++ b/queue-6.6/rxrpc-fix-response-to-ping-response-acks-to-a-dead-c.patch @@ -0,0 +1,50 @@ +From 3a2a90749fb0144dd42dd4c9a3906a3548afe7b2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 15:19:15 +0000 +Subject: rxrpc: Fix response to PING RESPONSE ACKs to a dead call + +From: David Howells + +[ Upstream commit 6f769f22822aa4124b556339781b04d810f0e038 ] + +Stop rxrpc from sending a DUP ACK in response to a PING RESPONSE ACK on a +dead call. We may have initiated the ping but the call may have beaten the +response to completion. + +Fixes: 18bfeba50dfd ("rxrpc: Perform terminal call ACK/ABORT retransmission from conn processor") +Signed-off-by: David Howells +cc: Marc Dionne +cc: "David S. Miller" +cc: Eric Dumazet +cc: Jakub Kicinski +cc: Paolo Abeni +cc: linux-afs@lists.infradead.org +cc: netdev@vger.kernel.org +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/rxrpc/conn_event.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c +index ec5eae60ab0c..1f251d758cb9 100644 +--- a/net/rxrpc/conn_event.c ++++ b/net/rxrpc/conn_event.c +@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + + _enter("%d", conn->debug_id); + ++ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) { ++ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), ++ &pkt.ack, sizeof(pkt.ack)) < 0) ++ return; ++ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE) ++ return; ++ } ++ + chan = &conn->channels[channel]; + + /* If the last call got moved on whilst we were waiting to run, just +-- +2.43.0 + diff --git a/queue-6.6/scsi-core-move-scsi_host_busy-out-of-host-lock-if-it.patch b/queue-6.6/scsi-core-move-scsi_host_busy-out-of-host-lock-if-it.patch new file mode 100644 index 00000000000..746bbfbe19e --- /dev/null +++ b/queue-6.6/scsi-core-move-scsi_host_busy-out-of-host-lock-if-it.patch @@ -0,0 +1,69 @@ +From 046204ad6253f309a4c17ef111de7dc530275da3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 3 Feb 2024 10:45:21 +0800 +Subject: scsi: core: Move scsi_host_busy() out of host lock if it is for + per-command + +From: Ming Lei + +[ Upstream commit 4e6c9011990726f4d175e2cdfebe5b0b8cce4839 ] + +Commit 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock +for waking up EH handler") intended to fix a hard lockup issue triggered by +EH. The core idea was to move scsi_host_busy() out of the host lock when +processing individual commands for EH. However, a suggested style change +inadvertently caused scsi_host_busy() to remain under the host lock. Fix +this by calling scsi_host_busy() outside the lock. + +Fixes: 4373534a9850 ("scsi: core: Move scsi_host_busy() out of host lock for waking up EH handler") +Cc: Sathya Prakash Veerichetty +Cc: Bart Van Assche +Cc: Ewan D. Milne +Signed-off-by: Ming Lei +Link: https://lore.kernel.org/r/20240203024521.2006455-1-ming.lei@redhat.com +Reviewed-by: Bart Van Assche +Signed-off-by: Martin K. Petersen +Signed-off-by: Sasha Levin +--- + drivers/scsi/scsi_error.c | 3 ++- + drivers/scsi/scsi_lib.c | 4 +++- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c +index 3328b175a832..43eff1107038 100644 +--- a/drivers/scsi/scsi_error.c ++++ b/drivers/scsi/scsi_error.c +@@ -282,11 +282,12 @@ static void scsi_eh_inc_host_failed(struct rcu_head *head) + { + struct scsi_cmnd *scmd = container_of(head, typeof(*scmd), rcu); + struct Scsi_Host *shost = scmd->device->host; ++ unsigned int busy = scsi_host_busy(shost); + unsigned long flags; + + spin_lock_irqsave(shost->host_lock, flags); + shost->host_failed++; +- scsi_eh_wakeup(shost, scsi_host_busy(shost)); ++ scsi_eh_wakeup(shost, busy); + spin_unlock_irqrestore(shost->host_lock, flags); + } + +diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c +index dfdffe55c5a6..552809bca350 100644 +--- a/drivers/scsi/scsi_lib.c ++++ b/drivers/scsi/scsi_lib.c +@@ -278,9 +278,11 @@ static void scsi_dec_host_busy(struct Scsi_Host *shost, struct scsi_cmnd *cmd) + rcu_read_lock(); + __clear_bit(SCMD_STATE_INFLIGHT, &cmd->state); + if (unlikely(scsi_host_in_recovery(shost))) { ++ unsigned int busy = scsi_host_busy(shost); ++ + spin_lock_irqsave(shost->host_lock, flags); + if (shost->host_failed || shost->host_eh_scheduled) +- scsi_eh_wakeup(shost, scsi_host_busy(shost)); ++ scsi_eh_wakeup(shost, busy); + spin_unlock_irqrestore(shost->host_lock, flags); + } + rcu_read_unlock(); +-- +2.43.0 + diff --git a/queue-6.6/selftests-cmsg_ipv6-repeat-the-exact-packet.patch b/queue-6.6/selftests-cmsg_ipv6-repeat-the-exact-packet.patch new file mode 100644 index 00000000000..5b24358adc6 --- /dev/null +++ b/queue-6.6/selftests-cmsg_ipv6-repeat-the-exact-packet.patch @@ -0,0 +1,56 @@ +From 2951bb2bae45a8a0b61254131f8815798a77fcae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 4 Feb 2024 08:56:18 -0800 +Subject: selftests: cmsg_ipv6: repeat the exact packet + +From: Jakub Kicinski + +[ Upstream commit 4b00d0c513da58b68df015968721b11396fe4ab3 ] + +cmsg_ipv6 test requests tcpdump to capture 4 packets, +and sends until tcpdump quits. Only the first packet +is "real", however, and the rest are basic UDP packets. +So if tcpdump doesn't start in time it will miss +the real packet and only capture the UDP ones. + +This makes the test fail on slow machine (no KVM or with +debug enabled) 100% of the time, while it passes in fast +environments. + +Repeat the "real" / expected packet. + +Fixes: 9657ad09e1fa ("selftests: net: test IPV6_TCLASS") +Fixes: 05ae83d5a4a2 ("selftests: net: test IPV6_HOPLIMIT") +Signed-off-by: Jakub Kicinski +Reviewed-by: Simon Horman +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/cmsg_ipv6.sh | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh +index 330d0b1ceced..c921750ca118 100755 +--- a/tools/testing/selftests/net/cmsg_ipv6.sh ++++ b/tools/testing/selftests/net/cmsg_ipv6.sh +@@ -91,7 +91,7 @@ for ovr in setsock cmsg both diff; do + check_result $? 0 "TCLASS $prot $ovr - pass" + + while [ -d /proc/$BG ]; do +- $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 ++ $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null +@@ -128,7 +128,7 @@ for ovr in setsock cmsg both diff; do + check_result $? 0 "HOPLIMIT $prot $ovr - pass" + + while [ -d /proc/$BG ]; do +- $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 ++ $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-avoid-just-another-constant-wait.patch b/queue-6.6/selftests-net-avoid-just-another-constant-wait.patch new file mode 100644 index 00000000000..7012c07d15b --- /dev/null +++ b/queue-6.6/selftests-net-avoid-just-another-constant-wait.patch @@ -0,0 +1,69 @@ +From 3de90db3edd150900512e41cd408a46fca882b90 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 19:42:41 +0100 +Subject: selftests: net: avoid just another constant wait + +From: Paolo Abeni + +[ Upstream commit 691bb4e49c98a47bc643dd808453136ce78b15b4 ] + +Using hard-coded constant timeout to wait for some expected +event is deemed to fail sooner or later, especially in slow +env. + +Our CI has spotted another of such race: + # TEST: ipv6: cleanup of cached exceptions - nexthop objects [FAIL] + # can't delete veth device in a timely manner, PMTU dst likely leaked + +Replace the crude sleep with a loop looking for the expected condition +at low interval for a much longer range. + +Fixes: b3cc4f8a8a41 ("selftests: pmtu: add explicit tests for PMTU exceptions cleanup") +Signed-off-by: Paolo Abeni +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/fd5c745e9bb665b724473af6a9373a8c2a62b247.1706812005.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/pmtu.sh | 18 +++++++++++++----- + 1 file changed, 13 insertions(+), 5 deletions(-) + +diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh +index f0febc19baae..d65fdd407d73 100755 +--- a/tools/testing/selftests/net/pmtu.sh ++++ b/tools/testing/selftests/net/pmtu.sh +@@ -1957,6 +1957,13 @@ check_command() { + return 0 + } + ++check_running() { ++ pid=${1} ++ cmd=${2} ++ ++ [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] ++} ++ + test_cleanup_vxlanX_exception() { + outer="${1}" + encap="vxlan" +@@ -1987,11 +1994,12 @@ test_cleanup_vxlanX_exception() { + + ${ns_a} ip link del dev veth_A-R1 & + iplink_pid=$! +- sleep 1 +- if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then +- err " can't delete veth device in a timely manner, PMTU dst likely leaked" +- return 1 +- fi ++ for i in $(seq 1 20); do ++ check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 ++ sleep 0.1 ++ done ++ err " can't delete veth device in a timely manner, PMTU dst likely leaked" ++ return 1 + } + + test_cleanup_ipv6_exception() { +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-change-shebang-to-bash-to-support-sour.patch b/queue-6.6/selftests-net-change-shebang-to-bash-to-support-sour.patch new file mode 100644 index 00000000000..5b2eacd2e88 --- /dev/null +++ b/queue-6.6/selftests-net-change-shebang-to-bash-to-support-sour.patch @@ -0,0 +1,89 @@ +From 70d77c9ec77a2262e13223a5afce85ce68520576 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 29 Dec 2023 21:19:31 +0800 +Subject: selftests/net: change shebang to bash to support "source" + +From: Yujie Liu + +[ Upstream commit 05d92cb0e919239c29b3a26da1f76f1e18fed7d3 ] + +The patch set [1] added a general lib.sh in net selftests, and converted +several test scripts to source the lib.sh. + +unicast_extensions.sh (converted in [1]) and pmtu.sh (converted in [2]) +have a /bin/sh shebang which may point to various shells in different +distributions, but "source" is only available in some of them. For +example, "source" is a built-it function in bash, but it cannot be +used in dash. + +Refer to other scripts that were converted together, simply change the +shebang to bash to fix the following issues when the default /bin/sh +points to other shells. + +not ok 51 selftests: net: unicast_extensions.sh # exit=1 + +v1 -> v2: + - Fix pmtu.sh which has the same issue as unicast_extensions.sh, + suggested by Hangbin + - Change the style of the "source" line to be consistent with other + tests, suggested by Hangbin + +Link: https://lore.kernel.org/all/20231202020110.362433-1-liuhangbin@gmail.com/ [1] +Link: https://lore.kernel.org/all/20231219094856.1740079-1-liuhangbin@gmail.com/ [2] +Reported-by: kernel test robot +Fixes: 378f082eaf37 ("selftests/net: convert pmtu.sh to run it in unique namespace") +Fixes: 0f4765d0b48d ("selftests/net: convert unicast_extensions.sh to run it in unique namespace") +Signed-off-by: Yujie Liu +Reviewed-by: Przemek Kitszel +Reviewed-by: Hangbin Liu +Reviewed-by: Muhammad Usama Anjum +Link: https://lore.kernel.org/r/20231229131931.3961150-1-yujie.liu@intel.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: e71e016ad0f6 ("selftests: net: fix tcp listener handling in pmtu.sh") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/pmtu.sh | 4 ++-- + tools/testing/selftests/net/unicast_extensions.sh | 4 ++-- + 2 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh +index 8518eaacf4b5..3f118e3f1c66 100755 +--- a/tools/testing/selftests/net/pmtu.sh ++++ b/tools/testing/selftests/net/pmtu.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + # + # Check that route PMTU values match expectations, and that initial device MTU +@@ -198,7 +198,7 @@ + # - pmtu_ipv6_route_change + # Same as above but with IPv6 + +-source ./lib.sh ++source lib.sh + + PAUSE_ON_FAIL=no + VERBOSE=0 +diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh +index b7a2cb9e7477..f52aa5f7da52 100755 +--- a/tools/testing/selftests/net/unicast_extensions.sh ++++ b/tools/testing/selftests/net/unicast_extensions.sh +@@ -1,4 +1,4 @@ +-#!/bin/sh ++#!/bin/bash + # SPDX-License-Identifier: GPL-2.0 + # + # By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project +@@ -28,7 +28,7 @@ + # These tests provide an easy way to flip the expected result of any + # of these behaviors for testing kernel patches that change them. + +-source ./lib.sh ++source lib.sh + + # nettest can be run from PATH or from same directory as this selftest + if ! which nettest >/dev/null; then +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-convert-pmtu.sh-to-run-it-in-unique-na.patch b/queue-6.6/selftests-net-convert-pmtu.sh-to-run-it-in-unique-na.patch new file mode 100644 index 00000000000..db462472177 --- /dev/null +++ b/queue-6.6/selftests-net-convert-pmtu.sh-to-run-it-in-unique-na.patch @@ -0,0 +1,108 @@ +From bc23d557ea4185417aaef1dc4120e6250d0138fc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 19 Dec 2023 17:48:55 +0800 +Subject: selftests/net: convert pmtu.sh to run it in unique namespace + +From: Hangbin Liu + +[ Upstream commit 378f082eaf3760cd7430fbcb1e4f8626bb6bc0ae ] + +pmtu test use /bin/sh, so we need to source ./lib.sh instead of lib.sh +Here is the test result after conversion. + + # ./pmtu.sh + TEST: ipv4: PMTU exceptions [ OK ] + TEST: ipv4: PMTU exceptions - nexthop objects [ OK ] + TEST: ipv6: PMTU exceptions [ OK ] + TEST: ipv6: PMTU exceptions - nexthop objects [ OK ] + ... + TEST: ipv4: list and flush cached exceptions - nexthop objects [ OK ] + TEST: ipv6: list and flush cached exceptions [ OK ] + TEST: ipv6: list and flush cached exceptions - nexthop objects [ OK ] + TEST: ipv4: PMTU exception w/route replace [ OK ] + TEST: ipv4: PMTU exception w/route replace - nexthop objects [ OK ] + TEST: ipv6: PMTU exception w/route replace [ OK ] + TEST: ipv6: PMTU exception w/route replace - nexthop objects [ OK ] + +Signed-off-by: Hangbin Liu +Signed-off-by: David S. Miller +Stable-dep-of: e71e016ad0f6 ("selftests: net: fix tcp listener handling in pmtu.sh") +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/pmtu.sh | 27 +++++++++------------------ + 1 file changed, 9 insertions(+), 18 deletions(-) + +diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh +index 4a5f031be232..8518eaacf4b5 100755 +--- a/tools/testing/selftests/net/pmtu.sh ++++ b/tools/testing/selftests/net/pmtu.sh +@@ -198,8 +198,7 @@ + # - pmtu_ipv6_route_change + # Same as above but with IPv6 + +-# Kselftest framework requirement - SKIP code is 4. +-ksft_skip=4 ++source ./lib.sh + + PAUSE_ON_FAIL=no + VERBOSE=0 +@@ -268,16 +267,6 @@ tests=" + pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + +-NS_A="ns-A" +-NS_B="ns-B" +-NS_C="ns-C" +-NS_R1="ns-R1" +-NS_R2="ns-R2" +-ns_a="ip netns exec ${NS_A}" +-ns_b="ip netns exec ${NS_B}" +-ns_c="ip netns exec ${NS_C}" +-ns_r1="ip netns exec ${NS_R1}" +-ns_r2="ip netns exec ${NS_R2}" + # Addressing and routing for tests with routers: four network segments, with + # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an + # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). +@@ -543,13 +532,17 @@ setup_ip6ip6() { + } + + setup_namespaces() { ++ setup_ns NS_A NS_B NS_C NS_R1 NS_R2 + for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do +- ip netns add ${n} || return 1 +- + # Disable DAD, so that we don't have to wait to use the + # configured IPv6 addresses + ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 + done ++ ns_a="ip netns exec ${NS_A}" ++ ns_b="ip netns exec ${NS_B}" ++ ns_c="ip netns exec ${NS_C}" ++ ns_r1="ip netns exec ${NS_R1}" ++ ns_r2="ip netns exec ${NS_R2}" + } + + setup_veth() { +@@ -839,7 +832,7 @@ setup_bridge() { + run_cmd ${ns_a} ip link set br0 up + + run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C +- run_cmd ${ns_c} ip link set veth_A-C netns ns-A ++ run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A} + + run_cmd ${ns_a} ip link set veth_A-C up + run_cmd ${ns_c} ip link set veth_C-A up +@@ -944,9 +937,7 @@ cleanup() { + done + socat_pids= + +- for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do +- ip netns del ${n} 2> /dev/null +- done ++ cleanup_all_ns + + ip link del veth_A-C 2>/dev/null + ip link del veth_A-R1 2>/dev/null +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-convert-unicast_extensions.sh-to-run-i.patch b/queue-6.6/selftests-net-convert-unicast_extensions.sh-to-run-i.patch new file mode 100644 index 00000000000..3f80df89b88 --- /dev/null +++ b/queue-6.6/selftests-net-convert-unicast_extensions.sh-to-run-i.patch @@ -0,0 +1,191 @@ +From 3cbdd083ed8e6f9be09e2b6c3bb8e946d880a965 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 2 Dec 2023 10:01:10 +0800 +Subject: selftests/net: convert unicast_extensions.sh to run it in unique + namespace + +From: Hangbin Liu + +[ Upstream commit 0f4765d0b48d90ede9788c7edb2e072eee20f88e ] + +Here is the test result after conversion. + + # ./unicast_extensions.sh + /usr/bin/which: no nettest in (/root/.local/bin:/root/bin:/usr/share/Modules/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin) + ########################################################################### + Unicast address extensions tests (behavior of reserved IPv4 addresses) + ########################################################################### + TEST: assign and ping within 240/4 (1 of 2) (is allowed) [ OK ] + TEST: assign and ping within 240/4 (2 of 2) (is allowed) [ OK ] + TEST: assign and ping within 0/8 (1 of 2) (is allowed) [ OK ] + + ... + + TEST: assign and ping class D address (is forbidden) [ OK ] + TEST: routing using class D (is forbidden) [ OK ] + TEST: routing using 127/8 (is forbidden) [ OK ] + +Acked-by: David Ahern +Signed-off-by: Hangbin Liu +Signed-off-by: Paolo Abeni +Stable-dep-of: e71e016ad0f6 ("selftests: net: fix tcp listener handling in pmtu.sh") +Signed-off-by: Sasha Levin +--- + .../selftests/net/unicast_extensions.sh | 99 +++++++++---------- + 1 file changed, 46 insertions(+), 53 deletions(-) + +diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh +index 2d10ccac898a..b7a2cb9e7477 100755 +--- a/tools/testing/selftests/net/unicast_extensions.sh ++++ b/tools/testing/selftests/net/unicast_extensions.sh +@@ -28,8 +28,7 @@ + # These tests provide an easy way to flip the expected result of any + # of these behaviors for testing kernel patches that change them. + +-# Kselftest framework requirement - SKIP code is 4. +-ksft_skip=4 ++source ./lib.sh + + # nettest can be run from PATH or from same directory as this selftest + if ! which nettest >/dev/null; then +@@ -61,20 +60,20 @@ _do_segmenttest(){ + # foo --- bar + # Arguments: ip_a ip_b prefix_length test_description + # +- # Caller must set up foo-ns and bar-ns namespaces ++ # Caller must set up $foo_ns and $bar_ns namespaces + # containing linked veth devices foo and bar, + # respectively. + +- ip -n foo-ns address add $1/$3 dev foo || return 1 +- ip -n foo-ns link set foo up || return 1 +- ip -n bar-ns address add $2/$3 dev bar || return 1 +- ip -n bar-ns link set bar up || return 1 ++ ip -n $foo_ns address add $1/$3 dev foo || return 1 ++ ip -n $foo_ns link set foo up || return 1 ++ ip -n $bar_ns address add $2/$3 dev bar || return 1 ++ ip -n $bar_ns link set bar up || return 1 + +- ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 +- ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 ++ ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 ++ ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 + +- nettest -B -N bar-ns -O foo-ns -r $1 || return 1 +- nettest -B -N foo-ns -O bar-ns -r $2 || return 1 ++ nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 ++ nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1 + + return 0 + } +@@ -88,31 +87,31 @@ _do_route_test(){ + # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description + # Displays test result and returns success or failure. + +- # Caller must set up foo-ns, bar-ns, and router-ns ++ # Caller must set up $foo_ns, $bar_ns, and $router_ns + # containing linked veth devices foo-foo1, bar1-bar +- # (foo in foo-ns, foo1 and bar1 in router-ns, and +- # bar in bar-ns). +- +- ip -n foo-ns address add $1/$5 dev foo || return 1 +- ip -n foo-ns link set foo up || return 1 +- ip -n foo-ns route add default via $2 || return 1 +- ip -n bar-ns address add $4/$5 dev bar || return 1 +- ip -n bar-ns link set bar up || return 1 +- ip -n bar-ns route add default via $3 || return 1 +- ip -n router-ns address add $2/$5 dev foo1 || return 1 +- ip -n router-ns link set foo1 up || return 1 +- ip -n router-ns address add $3/$5 dev bar1 || return 1 +- ip -n router-ns link set bar1 up || return 1 +- +- echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward +- +- ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 +- ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 +- ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 +- ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 +- +- nettest -B -N bar-ns -O foo-ns -r $1 || return 1 +- nettest -B -N foo-ns -O bar-ns -r $4 || return 1 ++ # (foo in $foo_ns, foo1 and bar1 in $router_ns, and ++ # bar in $bar_ns). ++ ++ ip -n $foo_ns address add $1/$5 dev foo || return 1 ++ ip -n $foo_ns link set foo up || return 1 ++ ip -n $foo_ns route add default via $2 || return 1 ++ ip -n $bar_ns address add $4/$5 dev bar || return 1 ++ ip -n $bar_ns link set bar up || return 1 ++ ip -n $bar_ns route add default via $3 || return 1 ++ ip -n $router_ns address add $2/$5 dev foo1 || return 1 ++ ip -n $router_ns link set foo1 up || return 1 ++ ip -n $router_ns address add $3/$5 dev bar1 || return 1 ++ ip -n $router_ns link set bar1 up || return 1 ++ ++ echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward ++ ++ ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 ++ ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1 ++ ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1 ++ ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 ++ ++ nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 ++ nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1 + + return 0 + } +@@ -121,17 +120,15 @@ segmenttest(){ + # Sets up veth link and tries to connect over it. + # Arguments: ip_a ip_b prefix_len test_description + hide_output +- ip netns add foo-ns +- ip netns add bar-ns +- ip link add foo netns foo-ns type veth peer name bar netns bar-ns ++ setup_ns foo_ns bar_ns ++ ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns + + test_result=0 + _do_segmenttest "$@" || test_result=1 + +- ip netns pids foo-ns | xargs -r kill -9 +- ip netns pids bar-ns | xargs -r kill -9 +- ip netns del foo-ns +- ip netns del bar-ns ++ ip netns pids $foo_ns | xargs -r kill -9 ++ ip netns pids $bar_ns | xargs -r kill -9 ++ cleanup_ns $foo_ns $bar_ns + show_output + + # inverted tests will expect failure instead of success +@@ -147,21 +144,17 @@ route_test(){ + # Returns success or failure. + + hide_output +- ip netns add foo-ns +- ip netns add bar-ns +- ip netns add router-ns +- ip link add foo netns foo-ns type veth peer name foo1 netns router-ns +- ip link add bar netns bar-ns type veth peer name bar1 netns router-ns ++ setup_ns foo_ns bar_ns router_ns ++ ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns ++ ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns + + test_result=0 + _do_route_test "$@" || test_result=1 + +- ip netns pids foo-ns | xargs -r kill -9 +- ip netns pids bar-ns | xargs -r kill -9 +- ip netns pids router-ns | xargs -r kill -9 +- ip netns del foo-ns +- ip netns del bar-ns +- ip netns del router-ns ++ ip netns pids $foo_ns | xargs -r kill -9 ++ ip netns pids $bar_ns | xargs -r kill -9 ++ ip netns pids $router_ns | xargs -r kill -9 ++ cleanup_ns $foo_ns $bar_ns $router_ns + + show_output + +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-cut-more-slack-for-gro-fwd-tests.patch b/queue-6.6/selftests-net-cut-more-slack-for-gro-fwd-tests.patch new file mode 100644 index 00000000000..374fa3c074a --- /dev/null +++ b/queue-6.6/selftests-net-cut-more-slack-for-gro-fwd-tests.patch @@ -0,0 +1,100 @@ +From 0ed9d5ec8ee421c08f15e80694ac979e228ba42d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 19:42:38 +0100 +Subject: selftests: net: cut more slack for gro fwd tests. + +From: Paolo Abeni + +[ Upstream commit cb9f4a30fb85e1f4f149ada595a67899adb3db19 ] + +The udpgro_fwd.sh self-tests are somewhat unstable. There are +a few timing constraints the we struggle to meet on very slow +environments. + +Instead of skipping the whole tests in such envs, increase the +test resilience WRT very slow hosts: increase the inter-packets +timeouts, avoid resetting the counters every second and finally +disable reduce the background traffic noise. + +Tested with: + +for I in $(seq 1 100); do + ./tools/testing/selftests/kselftest_install/run_kselftest.sh \ + -t net:udpgro_fwd.sh || exit -1 +done + +in a slow environment. + +Fixes: a062260a9d5f ("selftests: net: add UDP GRO forwarding self-tests") +Signed-off-by: Paolo Abeni +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/f4b6b11064a0d39182a9ae6a853abae3e9b4426a.1706812005.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/udpgro_fwd.sh | 14 ++++++++++++-- + tools/testing/selftests/net/udpgso_bench_rx.c | 2 +- + 2 files changed, 13 insertions(+), 3 deletions(-) + +diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh +index d6b9c759043c..9cd5e885e91f 100755 +--- a/tools/testing/selftests/net/udpgro_fwd.sh ++++ b/tools/testing/selftests/net/udpgro_fwd.sh +@@ -39,6 +39,10 @@ create_ns() { + for ns in $NS_SRC $NS_DST; do + ip netns add $ns + ip -n $ns link set dev lo up ++ ++ # disable route solicitations to decrease 'noise' traffic ++ ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 ++ ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 + done + + ip link add name veth$SRC type veth peer name veth$DST +@@ -80,6 +84,12 @@ create_vxlan_pair() { + create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 + ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad + done ++ ++ # preload neighbur cache, do avoid some noisy traffic ++ local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') ++ local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') ++ ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC ++ ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST + } + + is_ipv6() { +@@ -119,7 +129,7 @@ run_test() { + # not enable GRO + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 + ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 +- ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & ++ ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & + local spid=$! + wait_local_port_listen "$NS_DST" 8000 udp + ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst +@@ -168,7 +178,7 @@ run_bench() { + # bind the sender and the receiver to different CPUs to try + # get reproducible results + ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" +- ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & ++ ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & + local spid=$! + wait_local_port_listen "$NS_DST" 8000 udp + ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst +diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c +index f35a924d4a30..1cbadd267c96 100644 +--- a/tools/testing/selftests/net/udpgso_bench_rx.c ++++ b/tools/testing/selftests/net/udpgso_bench_rx.c +@@ -375,7 +375,7 @@ static void do_recv(void) + do_flush_udp(fd); + + tnow = gettimeofday_ms(); +- if (tnow > treport) { ++ if (!cfg_expected_pkt_nr && tnow > treport) { + if (packets) + fprintf(stderr, + "%s rx: %6lu MB/s %8lu calls/s\n", +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-fix-tcp-listener-handling-in-pmtu.sh.patch b/queue-6.6/selftests-net-fix-tcp-listener-handling-in-pmtu.sh.patch new file mode 100644 index 00000000000..08fb4bd3dd9 --- /dev/null +++ b/queue-6.6/selftests-net-fix-tcp-listener-handling-in-pmtu.sh.patch @@ -0,0 +1,64 @@ +From 685662e3e8d8f1a92798f2b925b0e7004bebd76c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 19:42:40 +0100 +Subject: selftests: net: fix tcp listener handling in pmtu.sh + +From: Paolo Abeni + +[ Upstream commit e71e016ad0f6e641a7898b8cda5f62f8e2beb2f1 ] + +The pmtu.sh test uses a few TCP listener in a problematic way: +It hard-codes a constant timeout to wait for the listener starting-up +in background. That introduces unneeded latency and on very slow and +busy host it can fail. + +Additionally the test starts again the same listener in the same +namespace on the same port, just after the previous connection +completed. Fast host can attempt starting the new server before the +old one really closed the socket. + +Address the issues using the wait_local_port_listen helper and +explicitly waiting for the background listener process exit. + +Fixes: 136a1b434bbb ("selftests: net: test vxlan pmtu exceptions with tcp") +Signed-off-by: Paolo Abeni +Reviewed-by: David Ahern +Link: https://lore.kernel.org/r/f8e8f6d44427d8c45e9f6a71ee1a321047452087.1706812005.git.pabeni@redhat.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/pmtu.sh | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh +index 3f118e3f1c66..f0febc19baae 100755 +--- a/tools/testing/selftests/net/pmtu.sh ++++ b/tools/testing/selftests/net/pmtu.sh +@@ -199,6 +199,7 @@ + # Same as above but with IPv6 + + source lib.sh ++source net_helper.sh + + PAUSE_ON_FAIL=no + VERBOSE=0 +@@ -1336,13 +1337,15 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { + TCPDST="TCP:[${dst}]:50000" + fi + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & ++ local socat_pid=$! + +- sleep 1 ++ wait_local_port_listen ${NS_B} 50000 tcp + + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + + size=$(du -sb $tmpoutfile) + size=${size%%/tmp/*} ++ wait ${socat_pid} + + [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + done +-- +2.43.0 + diff --git a/queue-6.6/selftests-net-let-big_tcp-test-cope-with-slow-env.patch b/queue-6.6/selftests-net-let-big_tcp-test-cope-with-slow-env.patch new file mode 100644 index 00000000000..fb83af3cd69 --- /dev/null +++ b/queue-6.6/selftests-net-let-big_tcp-test-cope-with-slow-env.patch @@ -0,0 +1,49 @@ +From d9c2b0e33d3d1e2e53d4d1427c706022470fe090 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Feb 2024 17:06:59 +0100 +Subject: selftests: net: let big_tcp test cope with slow env + +From: Paolo Abeni + +[ Upstream commit a19747c3b9bf6476cc36d0a3a5ef0ff92999169e ] + +In very slow environments, most big TCP cases including +segmentation and reassembly of big TCP packets have a good +chance to fail: by default the TCP client uses write size +well below 64K. If the host is low enough autocorking is +unable to build real big TCP packets. + +Address the issue using much larger write operations. + +Note that is hard to observe the issue without an extremely +slow and/or overloaded environment; reduce the TCP transfer +time to allow for much easier/faster reproducibility. + +Fixes: 6bb382bcf742 ("selftests: add a selftest for big tcp") +Signed-off-by: Paolo Abeni +Reviewed-by: Eric Dumazet +Acked-by: Xin Long +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + tools/testing/selftests/net/big_tcp.sh | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh +index cde9a91c4797..2db9d15cd45f 100755 +--- a/tools/testing/selftests/net/big_tcp.sh ++++ b/tools/testing/selftests/net/big_tcp.sh +@@ -122,7 +122,9 @@ do_netperf() { + local netns=$1 + + [ "$NF" = "6" ] && serip=$SERVER_IP6 +- ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null ++ ++ # use large write to be sure to generate big tcp packets ++ ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null + } + + do_test() { +-- +2.43.0 + diff --git a/queue-6.6/series b/queue-6.6/series index 2ccd55c87c8..46afaee0ef0 100644 --- a/queue-6.6/series +++ b/queue-6.6/series @@ -35,3 +35,63 @@ xfs-clean-up-dqblk-extraction.patch xfs-dquot-recovery-does-not-validate-the-recovered-d.patch xfs-clean-up-fs_xflag_realtime-handling-in-xfs_ioctl.patch xfs-respect-the-stable-writes-flag-on-the-rt-device.patch +drm-msms-dp-fixed-link-clock-divider-bits-be-over-wr.patch +drm-msm-dp-return-correct-colorimetry-for-dp_test_dy.patch +drm-msm-dpu-check-for-valid-hw_pp-in-dpu_encoder_hel.patch +x86-efistub-give-up-if-memory-attribute-protocol-ret.patch +x86-efistub-avoid-placing-the-kernel-below-load_phys.patch +net-stmmac-xgmac-fix-handling-of-dpp-safety-error-fo.patch +wifi-mac80211-fix-rcu-use-in-tdls-fast-xmit.patch +wifi-mac80211-fix-waiting-for-beacons-logic.patch +wifi-iwlwifi-exit-esr-only-after-the-fw-does.patch +wifi-brcmfmac-adjust-n_channels-usage-for-__counted_.patch +netdevsim-avoid-potential-loop-in-nsim_dev_trap_repo.patch +net-atlantic-fix-dma-mapping-for-ptp-hwts-ring.patch +selftests-net-cut-more-slack-for-gro-fwd-tests.patch +selftests-net-convert-unicast_extensions.sh-to-run-i.patch +selftests-net-convert-pmtu.sh-to-run-it-in-unique-na.patch +selftests-net-change-shebang-to-bash-to-support-sour.patch +selftests-net-fix-tcp-listener-handling-in-pmtu.sh.patch +selftests-net-avoid-just-another-constant-wait.patch +tsnep-fix-mapping-for-zero-copy-xdp_tx-action.patch +tunnels-fix-out-of-bounds-access-when-building-ipv6-.patch +atm-idt77252-fix-a-memleak-in-open_card_ubr0.patch +octeontx2-pf-fix-a-memleak-otx2_sq_init.patch +hwmon-aspeed-pwm-tacho-mutex-for-tach-reading.patch +hwmon-coretemp-fix-out-of-bounds-memory-access.patch +hwmon-coretemp-fix-bogus-core_id-to-attr-name-mappin.patch +inet-read-sk-sk_family-once-in-inet_recv_error.patch +drm-i915-gvt-fix-uninitialized-variable-in-handle_mm.patch +rxrpc-fix-generation-of-serial-numbers-to-skip-zero.patch +rxrpc-fix-delayed-acks-to-not-set-the-reference-seri.patch +rxrpc-fix-response-to-ping-response-acks-to-a-dead-c.patch +rxrpc-fix-counting-of-new-acks-and-nacks.patch +selftests-net-let-big_tcp-test-cope-with-slow-env.patch +tipc-check-the-bearer-type-before-calling-tipc_udp_n.patch +af_unix-call-kfree_skb-for-dead-unix_-sk-oob_skb-in-.patch +ppp_async-limit-mru-to-64k.patch +selftests-cmsg_ipv6-repeat-the-exact-packet.patch +netfilter-nft_compat-narrow-down-revision-to-unsigne.patch +netfilter-nft_compat-reject-unused-compat-flag.patch +netfilter-nft_compat-restrict-match-target-protocol-.patch +drm-amd-display-fix-panel_cntl-could-be-null-in-dcn2.patch +drm-amd-display-add-null-test-for-timing-generator-i.patch +drm-amd-display-implement-bounds-check-for-stream-en.patch +netfilter-nft_ct-reject-direction-for-ct-id.patch +netfilter-nft_set_pipapo-store-index-in-scratch-maps.patch +netfilter-nft_set_pipapo-add-helper-to-release-pcpu-.patch +netfilter-nft_set_pipapo-remove-scratch_aligned-poin.patch +fs-ntfs3-fix-an-null-dereference-bug.patch +riscv-improve-tlb_flush.patch +riscv-make-__flush_tlb_range-loop-over-pte-instead-o.patch +riscv-improve-flush_tlb_kernel_range.patch +mm-introduce-flush_cache_vmap_early.patch +riscv-mm-execute-local-tlb-flush-after-populating-vm.patch +riscv-fix-set_huge_pte_at-for-napot-mapping.patch +riscv-fix-hugetlb_mask_last_page-when-napot-is-enabl.patch +scsi-core-move-scsi_host_busy-out-of-host-lock-if-it.patch +riscv-flush-the-tlb-when-a-page-directory-is-freed.patch +libceph-rename-read_sparse_msg_-to-read_partial_spar.patch +libceph-just-wait-for-more-data-to-be-available-on-t.patch +riscv-fix-arch_hugetlb_migration_supported-for-napot.patch +riscv-declare-overflow_stack-as-exported-from-traps..patch diff --git a/queue-6.6/tipc-check-the-bearer-type-before-calling-tipc_udp_n.patch b/queue-6.6/tipc-check-the-bearer-type-before-calling-tipc_udp_n.patch new file mode 100644 index 00000000000..cf551774b5f --- /dev/null +++ b/queue-6.6/tipc-check-the-bearer-type-before-calling-tipc_udp_n.patch @@ -0,0 +1,80 @@ +From baa33704263c0034a8805888162686f9f7337c09 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 00:23:09 +0900 +Subject: tipc: Check the bearer type before calling tipc_udp_nl_bearer_add() + +From: Shigeru Yoshida + +[ Upstream commit 3871aa01e1a779d866fa9dfdd5a836f342f4eb87 ] + +syzbot reported the following general protection fault [1]: + +general protection fault, probably for non-canonical address 0xdffffc0000000010: 0000 [#1] PREEMPT SMP KASAN +KASAN: null-ptr-deref in range [0x0000000000000080-0x0000000000000087] +... +RIP: 0010:tipc_udp_is_known_peer+0x9c/0x250 net/tipc/udp_media.c:291 +... +Call Trace: + + tipc_udp_nl_bearer_add+0x212/0x2f0 net/tipc/udp_media.c:646 + tipc_nl_bearer_add+0x21e/0x360 net/tipc/bearer.c:1089 + genl_family_rcv_msg_doit+0x1fc/0x2e0 net/netlink/genetlink.c:972 + genl_family_rcv_msg net/netlink/genetlink.c:1052 [inline] + genl_rcv_msg+0x561/0x800 net/netlink/genetlink.c:1067 + netlink_rcv_skb+0x16b/0x440 net/netlink/af_netlink.c:2544 + genl_rcv+0x28/0x40 net/netlink/genetlink.c:1076 + netlink_unicast_kernel net/netlink/af_netlink.c:1341 [inline] + netlink_unicast+0x53b/0x810 net/netlink/af_netlink.c:1367 + netlink_sendmsg+0x8b7/0xd70 net/netlink/af_netlink.c:1909 + sock_sendmsg_nosec net/socket.c:730 [inline] + __sock_sendmsg+0xd5/0x180 net/socket.c:745 + ____sys_sendmsg+0x6ac/0x940 net/socket.c:2584 + ___sys_sendmsg+0x135/0x1d0 net/socket.c:2638 + __sys_sendmsg+0x117/0x1e0 net/socket.c:2667 + do_syscall_x64 arch/x86/entry/common.c:52 [inline] + do_syscall_64+0x40/0x110 arch/x86/entry/common.c:83 + entry_SYSCALL_64_after_hwframe+0x63/0x6b + +The cause of this issue is that when tipc_nl_bearer_add() is called with +the TIPC_NLA_BEARER_UDP_OPTS attribute, tipc_udp_nl_bearer_add() is called +even if the bearer is not UDP. + +tipc_udp_is_known_peer() called by tipc_udp_nl_bearer_add() assumes that +the media_ptr field of the tipc_bearer has an udp_bearer type object, so +the function goes crazy for non-UDP bearers. + +This patch fixes the issue by checking the bearer type before calling +tipc_udp_nl_bearer_add() in tipc_nl_bearer_add(). + +Fixes: ef20cd4dd163 ("tipc: introduce UDP replicast") +Reported-and-tested-by: syzbot+5142b87a9abc510e14fa@syzkaller.appspotmail.com +Closes: https://syzkaller.appspot.com/bug?extid=5142b87a9abc510e14fa [1] +Signed-off-by: Shigeru Yoshida +Reviewed-by: Tung Nguyen +Link: https://lore.kernel.org/r/20240131152310.4089541-1-syoshida@redhat.com +Signed-off-by: Paolo Abeni +Signed-off-by: Sasha Levin +--- + net/tipc/bearer.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c +index 2cde375477e3..878415c43527 100644 +--- a/net/tipc/bearer.c ++++ b/net/tipc/bearer.c +@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) + + #ifdef CONFIG_TIPC_MEDIA_UDP + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { ++ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) { ++ rtnl_unlock(); ++ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported"); ++ return -EINVAL; ++ } ++ + err = tipc_udp_nl_bearer_add(b, + attrs[TIPC_NLA_BEARER_UDP_OPTS]); + if (err) { +-- +2.43.0 + diff --git a/queue-6.6/tsnep-fix-mapping-for-zero-copy-xdp_tx-action.patch b/queue-6.6/tsnep-fix-mapping-for-zero-copy-xdp_tx-action.patch new file mode 100644 index 00000000000..5599c90436a --- /dev/null +++ b/queue-6.6/tsnep-fix-mapping-for-zero-copy-xdp_tx-action.patch @@ -0,0 +1,84 @@ +From 3fd3ec1ee3cf423eaabb3b0447c765481f5c4ae9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Jan 2024 21:14:13 +0100 +Subject: tsnep: Fix mapping for zero copy XDP_TX action + +From: Gerhard Engleder + +[ Upstream commit d7f5fb33cf77247b7bf9a871aaeea72ca4f51ad7 ] + +For XDP_TX action xdp_buff is converted to xdp_frame. The conversion is +done by xdp_convert_buff_to_frame(). The memory type of the resulting +xdp_frame depends on the memory type of the xdp_buff. For page pool +based xdp_buff it produces xdp_frame with memory type +MEM_TYPE_PAGE_POOL. For zero copy XSK pool based xdp_buff it produces +xdp_frame with memory type MEM_TYPE_PAGE_ORDER0. + +tsnep_xdp_xmit_back() is not prepared for that and uses always the page +pool buffer type TSNEP_TX_TYPE_XDP_TX. This leads to invalid mappings +and the transmission of undefined data. + +Improve tsnep_xdp_xmit_back() to use the generic buffer type +TSNEP_TX_TYPE_XDP_NDO for zero copy XDP_TX. + +Fixes: 3fc2333933fd ("tsnep: Add XDP socket zero-copy RX support") +Signed-off-by: Gerhard Engleder +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/ethernet/engleder/tsnep_main.c | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c +index 08e113e785a7..4f36b29d66c8 100644 +--- a/drivers/net/ethernet/engleder/tsnep_main.c ++++ b/drivers/net/ethernet/engleder/tsnep_main.c +@@ -668,17 +668,25 @@ static void tsnep_xdp_xmit_flush(struct tsnep_tx *tx) + + static bool tsnep_xdp_xmit_back(struct tsnep_adapter *adapter, + struct xdp_buff *xdp, +- struct netdev_queue *tx_nq, struct tsnep_tx *tx) ++ struct netdev_queue *tx_nq, struct tsnep_tx *tx, ++ bool zc) + { + struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); + bool xmit; ++ u32 type; + + if (unlikely(!xdpf)) + return false; + ++ /* no page pool for zero copy */ ++ if (zc) ++ type = TSNEP_TX_TYPE_XDP_NDO; ++ else ++ type = TSNEP_TX_TYPE_XDP_TX; ++ + __netif_tx_lock(tx_nq, smp_processor_id()); + +- xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, TSNEP_TX_TYPE_XDP_TX); ++ xmit = tsnep_xdp_xmit_frame_ring(xdpf, tx, type); + + /* Avoid transmit queue timeout since we share it with the slow path */ + if (xmit) +@@ -1222,7 +1230,7 @@ static bool tsnep_xdp_run_prog(struct tsnep_rx *rx, struct bpf_prog *prog, + case XDP_PASS: + return false; + case XDP_TX: +- if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx)) ++ if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, false)) + goto out_failure; + *status |= TSNEP_XDP_TX; + return true; +@@ -1272,7 +1280,7 @@ static bool tsnep_xdp_run_prog_zc(struct tsnep_rx *rx, struct bpf_prog *prog, + case XDP_PASS: + return false; + case XDP_TX: +- if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx)) ++ if (!tsnep_xdp_xmit_back(rx->adapter, xdp, tx_nq, tx, true)) + goto out_failure; + *status |= TSNEP_XDP_TX; + return true; +-- +2.43.0 + diff --git a/queue-6.6/tunnels-fix-out-of-bounds-access-when-building-ipv6-.patch b/queue-6.6/tunnels-fix-out-of-bounds-access-when-building-ipv6-.patch new file mode 100644 index 00000000000..f0144e0e573 --- /dev/null +++ b/queue-6.6/tunnels-fix-out-of-bounds-access-when-building-ipv6-.patch @@ -0,0 +1,54 @@ +From 3d30287d7e1463cec63a49aa9fddd26bace3f120 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 09:38:15 +0100 +Subject: tunnels: fix out of bounds access when building IPv6 PMTU error + +From: Antoine Tenart + +[ Upstream commit d75abeec401f8c86b470e7028a13fcdc87e5dd06 ] + +If the ICMPv6 error is built from a non-linear skb we get the following +splat, + + BUG: KASAN: slab-out-of-bounds in do_csum+0x220/0x240 + Read of size 4 at addr ffff88811d402c80 by task netperf/820 + CPU: 0 PID: 820 Comm: netperf Not tainted 6.8.0-rc1+ #543 + ... + kasan_report+0xd8/0x110 + do_csum+0x220/0x240 + csum_partial+0xc/0x20 + skb_tunnel_check_pmtu+0xeb9/0x3280 + vxlan_xmit_one+0x14c2/0x4080 + vxlan_xmit+0xf61/0x5c00 + dev_hard_start_xmit+0xfb/0x510 + __dev_queue_xmit+0x7cd/0x32a0 + br_dev_queue_push_xmit+0x39d/0x6a0 + +Use skb_checksum instead of csum_partial who cannot deal with non-linear +SKBs. + +Fixes: 4cb47a8644cc ("tunnels: PMTU discovery support for directly bridged IP packets") +Signed-off-by: Antoine Tenart +Reviewed-by: Jiri Pirko +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + net/ipv4/ip_tunnel_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c +index 586b1b3e35b8..80ccd6661aa3 100644 +--- a/net/ipv4/ip_tunnel_core.c ++++ b/net/ipv4/ip_tunnel_core.c +@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu) + }; + skb_reset_network_header(skb); + +- csum = csum_partial(icmp6h, len, 0); ++ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0); + icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len, + IPPROTO_ICMPV6, csum); + +-- +2.43.0 + diff --git a/queue-6.6/wifi-brcmfmac-adjust-n_channels-usage-for-__counted_.patch b/queue-6.6/wifi-brcmfmac-adjust-n_channels-usage-for-__counted_.patch new file mode 100644 index 00000000000..2debf2304e2 --- /dev/null +++ b/queue-6.6/wifi-brcmfmac-adjust-n_channels-usage-for-__counted_.patch @@ -0,0 +1,65 @@ +From 0d12eb9ea7f6c5d195ac33ddb385ae9eefa4b699 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jan 2024 14:31:53 -0800 +Subject: wifi: brcmfmac: Adjust n_channels usage for __counted_by + +From: Kees Cook + +[ Upstream commit 5bdda0048c8d1bbe2019513b2d6200cc0d09c7bd ] + +After commit e3eac9f32ec0 ("wifi: cfg80211: Annotate struct +cfg80211_scan_request with __counted_by"), the compiler may enforce +dynamic array indexing of req->channels to stay below n_channels. As a +result, n_channels needs to be increased _before_ accessing the newly +added array index. Increment it first, then use "i" for the prior index. +Solves this warning in the coming GCC that has __counted_by support: + +../drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c: In function 'brcmf_internal_escan_add_info': +../drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c:3783:46: warning: operation on 'req-> +n_channels' may be undefined [-Wsequence-point] + 3783 | req->channels[req->n_channels++] = chan; + | ~~~~~~~~~~~~~~~^~ + +Fixes: e3eac9f32ec0 ("wifi: cfg80211: Annotate struct cfg80211_scan_request with __counted_by") +Cc: Arend van Spriel +Cc: Franky Lin +Cc: Hante Meuleman +Cc: Kalle Valo +Cc: Chi-hsien Lin +Cc: Ian Lin +Cc: Johannes Berg +Cc: Wright Feng +Cc: Hector Martin +Cc: linux-wireless@vger.kernel.org +Cc: brcm80211-dev-list.pdl@broadcom.com +Signed-off-by: Kees Cook +Reviewed-by: Hans de Goede +Reviewed-by: Linus Walleij +Reviewed-by: Gustavo A. R. Silva +Signed-off-by: Kalle Valo +Link: https://msgid.link/20240126223150.work.548-kees@kernel.org +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +index 2a90bb24ba77..6049f9a761d9 100644 +--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c ++++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +@@ -3780,8 +3780,10 @@ static int brcmf_internal_escan_add_info(struct cfg80211_scan_request *req, + if (req->channels[i] == chan) + break; + } +- if (i == req->n_channels) +- req->channels[req->n_channels++] = chan; ++ if (i == req->n_channels) { ++ req->n_channels++; ++ req->channels[i] = chan; ++ } + + for (i = 0; i < req->n_ssids; i++) { + if (req->ssids[i].ssid_len == ssid_len && +-- +2.43.0 + diff --git a/queue-6.6/wifi-iwlwifi-exit-esr-only-after-the-fw-does.patch b/queue-6.6/wifi-iwlwifi-exit-esr-only-after-the-fw-does.patch new file mode 100644 index 00000000000..be4858118ac --- /dev/null +++ b/queue-6.6/wifi-iwlwifi-exit-esr-only-after-the-fw-does.patch @@ -0,0 +1,60 @@ +From 62581de53c922277d27cf89bae9dbda2001a5df6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 1 Feb 2024 16:17:39 +0200 +Subject: wifi: iwlwifi: exit eSR only after the FW does + +From: Miri Korenblit + +[ Upstream commit 16867c38bcd3be2eb9016a3198a096f93959086e ] + +Currently the driver exits eSR by calling +iwl_mvm_esr_mode_inactive() before updating the FW +(by deactivating one of the links), and therefore before +sending the EML frame notifying that we are no longer in eSR. + +This is wrong for several reasons: +1. The driver sends SMPS activation frames when we are still in eSR + and SMPS should be disabled when in eSR +2. The driver restores RLC configuration as it was before eSR + entering, and RLC command shouldn't be sent in eSR + +Fix this by calling iwl_mvm_esr_mode_inactive() after FW update + +Fixes: 12bacfc2c065 ("wifi: iwlwifi: handle eSR transitions") +Signed-off-by: Miri Korenblit +Reviewed-by: Ilan Peer +Reviewed-by: Gregory Greenman +Link: https://msgid.link/20240201155157.d8d9dc277d4e.Ib5aee0fd05e35b1da7f18753eb3c8fa0a3f872f3@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +index 1e58f0234293..2d1fd7ac8577 100644 +--- a/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c ++++ b/drivers/net/wireless/intel/iwlwifi/mvm/mld-mac80211.c +@@ -435,6 +435,9 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm, + mvmvif->ap_ibss_active = false; + } + ++ iwl_mvm_link_changed(mvm, vif, link_conf, ++ LINK_CONTEXT_MODIFY_ACTIVE, false); ++ + if (iwl_mvm_is_esr_supported(mvm->fwrt.trans) && n_active > 1) { + int ret = iwl_mvm_esr_mode_inactive(mvm, vif); + +@@ -446,9 +449,6 @@ __iwl_mvm_mld_unassign_vif_chanctx(struct iwl_mvm *mvm, + if (vif->type == NL80211_IFTYPE_MONITOR) + iwl_mvm_mld_rm_snif_sta(mvm, vif); + +- iwl_mvm_link_changed(mvm, vif, link_conf, +- LINK_CONTEXT_MODIFY_ACTIVE, false); +- + if (switching_chanctx) + return; + mvmvif->link[link_id]->phy_ctxt = NULL; +-- +2.43.0 + diff --git a/queue-6.6/wifi-mac80211-fix-rcu-use-in-tdls-fast-xmit.patch b/queue-6.6/wifi-mac80211-fix-rcu-use-in-tdls-fast-xmit.patch new file mode 100644 index 00000000000..427d4c22192 --- /dev/null +++ b/queue-6.6/wifi-mac80211-fix-rcu-use-in-tdls-fast-xmit.patch @@ -0,0 +1,42 @@ +From 9a822a7c8f1fbdf1232147cdc8ab2b7b1458241f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 29 Jan 2024 15:53:48 +0100 +Subject: wifi: mac80211: fix RCU use in TDLS fast-xmit + +From: Johannes Berg + +[ Upstream commit 9480adfe4e0f0319b9da04b44e4eebd5ad07e0cd ] + +This looks up the link under RCU protection, but isn't +guaranteed to actually have protection. Fix that. + +Fixes: 8cc07265b691 ("wifi: mac80211: handle TDLS data frames with MLO") +Link: https://msgid.link/20240129155348.8a9c0b1e1d89.I553f96ce953bb41b0b877d592056164dec20d01c@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/tx.c | 7 ++++--- + 1 file changed, 4 insertions(+), 3 deletions(-) + +diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c +index d45d4be63dd8..5481acbfc1d4 100644 +--- a/net/mac80211/tx.c ++++ b/net/mac80211/tx.c +@@ -3086,10 +3086,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta) + /* DA SA BSSID */ + build.da_offs = offsetof(struct ieee80211_hdr, addr1); + build.sa_offs = offsetof(struct ieee80211_hdr, addr2); ++ rcu_read_lock(); + link = rcu_dereference(sdata->link[tdls_link_id]); +- if (WARN_ON_ONCE(!link)) +- break; +- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); ++ if (!WARN_ON_ONCE(!link)) ++ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN); ++ rcu_read_unlock(); + build.hdr_len = 24; + break; + } +-- +2.43.0 + diff --git a/queue-6.6/wifi-mac80211-fix-waiting-for-beacons-logic.patch b/queue-6.6/wifi-mac80211-fix-waiting-for-beacons-logic.patch new file mode 100644 index 00000000000..f5f9a9bfb18 --- /dev/null +++ b/queue-6.6/wifi-mac80211-fix-waiting-for-beacons-logic.patch @@ -0,0 +1,37 @@ +From 58eab48fd8f983d061b96e19c0e73d1bedcfeafd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 31 Jan 2024 16:48:56 +0100 +Subject: wifi: mac80211: fix waiting for beacons logic + +From: Johannes Berg + +[ Upstream commit a0b4f2291319c5d47ecb196b90400814fdcfd126 ] + +This should be waiting if we don't have a beacon yet, +but somehow I managed to invert the logic. Fix that. + +Fixes: 74e1309acedc ("wifi: mac80211: mlme: look up beacon elems only if needed") +Link: https://msgid.link/20240131164856.922701229546.I239b379e7cee04608e73c016b737a5245e5b23dd@changeid +Signed-off-by: Johannes Berg +Signed-off-by: Sasha Levin +--- + net/mac80211/mlme.c | 3 +-- + 1 file changed, 1 insertion(+), 2 deletions(-) + +diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c +index 73f8df03d159..d9e716f38b0e 100644 +--- a/net/mac80211/mlme.c ++++ b/net/mac80211/mlme.c +@@ -7727,8 +7727,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, + + rcu_read_lock(); + beacon_ies = rcu_dereference(req->bss->beacon_ies); +- +- if (beacon_ies) { ++ if (!beacon_ies) { + /* + * Wait up to one beacon interval ... + * should this be more if we miss one? +-- +2.43.0 + diff --git a/queue-6.6/x86-efistub-avoid-placing-the-kernel-below-load_phys.patch b/queue-6.6/x86-efistub-avoid-placing-the-kernel-below-load_phys.patch new file mode 100644 index 00000000000..cb842c53932 --- /dev/null +++ b/queue-6.6/x86-efistub-avoid-placing-the-kernel-below-load_phys.patch @@ -0,0 +1,142 @@ +From 81ff2e5ec087f278e65613b427ed012523a875a4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 30 Jan 2024 19:01:35 +0100 +Subject: x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR + +From: Ard Biesheuvel + +[ Upstream commit 2f77465b05b1270c832b5e2ee27037672ad2a10a ] + +The EFI stub's kernel placement logic randomizes the physical placement +of the kernel by taking all available memory into account, and picking a +region at random, based on a random seed. + +When KASLR is disabled, this seed is set to 0x0, and this results in the +lowest available region of memory to be selected for loading the kernel, +even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is +typically reserved for the GFP_DMA region, to accommodate masters that +can only access the first 16 MiB of system memory. + +Even if such devices are rare these days, we may still end up with a +warning in the kernel log, as reported by Tom: + + swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0 + +Fix this by tweaking the random allocation logic to accept a low bound +on the placement, and set it to LOAD_PHYSICAL_ADDR. + +Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") +Reported-by: Tom Englund +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404 +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/libstub/efistub.h | 3 ++- + drivers/firmware/efi/libstub/kaslr.c | 2 +- + drivers/firmware/efi/libstub/randomalloc.c | 12 +++++++----- + drivers/firmware/efi/libstub/x86-stub.c | 1 + + drivers/firmware/efi/libstub/zboot.c | 2 +- + 5 files changed, 12 insertions(+), 8 deletions(-) + +diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h +index 212687c30d79..c04b82ea40f2 100644 +--- a/drivers/firmware/efi/libstub/efistub.h ++++ b/drivers/firmware/efi/libstub/efistub.h +@@ -956,7 +956,8 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); + + efi_status_t efi_random_alloc(unsigned long size, unsigned long align, + unsigned long *addr, unsigned long random_seed, +- int memory_type, unsigned long alloc_limit); ++ int memory_type, unsigned long alloc_min, ++ unsigned long alloc_max); + + efi_status_t efi_random_get_seed(void); + +diff --git a/drivers/firmware/efi/libstub/kaslr.c b/drivers/firmware/efi/libstub/kaslr.c +index 62d63f7a2645..1a9808012abd 100644 +--- a/drivers/firmware/efi/libstub/kaslr.c ++++ b/drivers/firmware/efi/libstub/kaslr.c +@@ -119,7 +119,7 @@ efi_status_t efi_kaslr_relocate_kernel(unsigned long *image_addr, + */ + status = efi_random_alloc(*reserve_size, min_kimg_align, + reserve_addr, phys_seed, +- EFI_LOADER_CODE, EFI_ALLOC_LIMIT); ++ EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + efi_warn("efi_random_alloc() failed: 0x%lx\n", status); + } else { +diff --git a/drivers/firmware/efi/libstub/randomalloc.c b/drivers/firmware/efi/libstub/randomalloc.c +index 674a064b8f7a..4e96a855fdf4 100644 +--- a/drivers/firmware/efi/libstub/randomalloc.c ++++ b/drivers/firmware/efi/libstub/randomalloc.c +@@ -17,7 +17,7 @@ + static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + unsigned long size, + unsigned long align_shift, +- u64 alloc_limit) ++ u64 alloc_min, u64 alloc_max) + { + unsigned long align = 1UL << align_shift; + u64 first_slot, last_slot, region_end; +@@ -30,11 +30,11 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, + return 0; + + region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, +- alloc_limit); ++ alloc_max); + if (region_end < size) + return 0; + +- first_slot = round_up(md->phys_addr, align); ++ first_slot = round_up(max(md->phys_addr, alloc_min), align); + last_slot = round_down(region_end - size + 1, align); + + if (first_slot > last_slot) +@@ -56,7 +56,8 @@ efi_status_t efi_random_alloc(unsigned long size, + unsigned long *addr, + unsigned long random_seed, + int memory_type, +- unsigned long alloc_limit) ++ unsigned long alloc_min, ++ unsigned long alloc_max) + { + unsigned long total_slots = 0, target_slot; + unsigned long total_mirrored_slots = 0; +@@ -78,7 +79,8 @@ efi_status_t efi_random_alloc(unsigned long size, + efi_memory_desc_t *md = (void *)map->map + map_offset; + unsigned long slots; + +- slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit); ++ slots = get_entry_num_slots(md, size, ilog2(align), alloc_min, ++ alloc_max); + MD_NUM_SLOTS(md) = slots; + total_slots += slots; + if (md->attribute & EFI_MEMORY_MORE_RELIABLE) +diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c +index 5d0934ae7dc8..4a11470bed5e 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -831,6 +831,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) + + status = efi_random_alloc(alloc_size, CONFIG_PHYSICAL_ALIGN, &addr, + seed[0], EFI_LOADER_CODE, ++ LOAD_PHYSICAL_ADDR, + EFI_X86_KERNEL_ALLOC_LIMIT); + if (status != EFI_SUCCESS) + return status; +diff --git a/drivers/firmware/efi/libstub/zboot.c b/drivers/firmware/efi/libstub/zboot.c +index bdb17eac0cb4..1ceace956758 100644 +--- a/drivers/firmware/efi/libstub/zboot.c ++++ b/drivers/firmware/efi/libstub/zboot.c +@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) + } + + status = efi_random_alloc(alloc_size, min_kimg_align, &image_base, +- seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT); ++ seed, EFI_LOADER_CODE, 0, EFI_ALLOC_LIMIT); + if (status != EFI_SUCCESS) { + efi_err("Failed to allocate memory\n"); + goto free_cmdline; +-- +2.43.0 + diff --git a/queue-6.6/x86-efistub-give-up-if-memory-attribute-protocol-ret.patch b/queue-6.6/x86-efistub-give-up-if-memory-attribute-protocol-ret.patch new file mode 100644 index 00000000000..0936fef4ce0 --- /dev/null +++ b/queue-6.6/x86-efistub-give-up-if-memory-attribute-protocol-ret.patch @@ -0,0 +1,120 @@ +From 53bc1f02a1f2c0185992cc599a436bc8475f4176 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 26 Jan 2024 12:14:30 +0100 +Subject: x86/efistub: Give up if memory attribute protocol returns an error + +From: Ard Biesheuvel + +[ Upstream commit a7a6a01f88e87dec4bf2365571dd2dc7403d52d0 ] + +The recently introduced EFI memory attributes protocol should be used +if it exists to ensure that the memory allocation created for the kernel +permits execution. This is needed for compatibility with tightened +requirements related to Windows logo certification for x86 PCs. + +Currently, we simply strip the execute protect (XP) attribute from the +entire range, but this might be rejected under some firmware security +policies, and so in a subsequent patch, this will be changed to only +strip XP from the executable region that runs early, and make it +read-only (RO) as well. + +In order to catch any issues early, ensure that the memory attribute +protocol works as intended, and give up if it produces spurious errors. + +Note that the DXE services based fallback was always based on best +effort, so don't propagate any errors returned by that API. + +Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot") +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/libstub/x86-stub.c | 24 ++++++++++++++---------- + drivers/firmware/efi/libstub/x86-stub.h | 4 ++-- + 2 files changed, 16 insertions(+), 12 deletions(-) + +diff --git a/drivers/firmware/efi/libstub/x86-stub.c b/drivers/firmware/efi/libstub/x86-stub.c +index 70b325a2f1f3..5d0934ae7dc8 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.c ++++ b/drivers/firmware/efi/libstub/x86-stub.c +@@ -223,8 +223,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params) + } + } + +-void efi_adjust_memory_range_protection(unsigned long start, +- unsigned long size) ++efi_status_t efi_adjust_memory_range_protection(unsigned long start, ++ unsigned long size) + { + efi_status_t status; + efi_gcd_memory_space_desc_t desc; +@@ -236,13 +236,17 @@ void efi_adjust_memory_range_protection(unsigned long start, + rounded_end = roundup(start + size, EFI_PAGE_SIZE); + + if (memattr != NULL) { +- efi_call_proto(memattr, clear_memory_attributes, rounded_start, +- rounded_end - rounded_start, EFI_MEMORY_XP); +- return; ++ status = efi_call_proto(memattr, clear_memory_attributes, ++ rounded_start, ++ rounded_end - rounded_start, ++ EFI_MEMORY_XP); ++ if (status != EFI_SUCCESS) ++ efi_warn("Failed to clear EFI_MEMORY_XP attribute\n"); ++ return status; + } + + if (efi_dxe_table == NULL) +- return; ++ return EFI_SUCCESS; + + /* + * Don't modify memory region attributes, they are +@@ -255,7 +259,7 @@ void efi_adjust_memory_range_protection(unsigned long start, + status = efi_dxe_call(get_memory_space_descriptor, start, &desc); + + if (status != EFI_SUCCESS) +- return; ++ break; + + next = desc.base_address + desc.length; + +@@ -280,8 +284,10 @@ void efi_adjust_memory_range_protection(unsigned long start, + unprotect_start, + unprotect_start + unprotect_size, + status); ++ break; + } + } ++ return EFI_SUCCESS; + } + + static void setup_unaccepted_memory(void) +@@ -837,9 +843,7 @@ static efi_status_t efi_decompress_kernel(unsigned long *kernel_entry) + + *kernel_entry = addr + entry; + +- efi_adjust_memory_range_protection(addr, kernel_total_size); +- +- return EFI_SUCCESS; ++ return efi_adjust_memory_range_protection(addr, kernel_total_size); + } + + static void __noreturn enter_kernel(unsigned long kernel_addr, +diff --git a/drivers/firmware/efi/libstub/x86-stub.h b/drivers/firmware/efi/libstub/x86-stub.h +index 2748bca192df..4433d0f97441 100644 +--- a/drivers/firmware/efi/libstub/x86-stub.h ++++ b/drivers/firmware/efi/libstub/x86-stub.h +@@ -7,8 +7,8 @@ extern struct boot_params *boot_params_pointer asm("boot_params"); + extern void trampoline_32bit_src(void *, bool); + extern const u16 trampoline_ljmp_imm_offset; + +-void efi_adjust_memory_range_protection(unsigned long start, +- unsigned long size); ++efi_status_t efi_adjust_memory_range_protection(unsigned long start, ++ unsigned long size); + + #ifdef CONFIG_X86_64 + efi_status_t efi_setup_5level_paging(void); +-- +2.43.0 +