From: Sasha Levin Date: Tue, 18 Apr 2023 01:24:48 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v4.14.313~18^2~5 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=c2f8a972cb36608fef5ef875a266a9fa76503292;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_.patch b/queue-6.1/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_.patch new file mode 100644 index 00000000000..fc3c1833c2c --- /dev/null +++ b/queue-6.1/cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_.patch @@ -0,0 +1,176 @@ +From ce2f88c9541a819b574d9ed9b84127db31227fb2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Apr 2023 09:35:59 -0400 +Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Waiman Long + +[ Upstream commit eee87853794187f6adbe19533ed79c8b44b36a91 ] + +In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept +new tasks. It is too late to check that in cpuset_fork(). So we need +to add the cpuset_can_fork() and cpuset_cancel_fork() methods to +pre-check it before we can allow attachment to a different cpuset. + +We also need to set the attach_in_progress flag to alert other code +that a new task is going to be added to the cpuset. + +Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") +Suggested-by: Michal Koutný +Signed-off-by: Waiman Long +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Tejun Heo +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cpuset.c | 97 +++++++++++++++++++++++++++++++++++++----- + 1 file changed, 86 insertions(+), 11 deletions(-) + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 85f071fb1a414..e276db7228451 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp) + + static struct cpuset *cpuset_attach_old_cs; + ++/* ++ * Check to see if a cpuset can accept a new task ++ * For v1, cpus_allowed and mems_allowed can't be empty. ++ * For v2, effective_cpus can't be empty. ++ * Note that in v1, effective_cpus = cpus_allowed. ++ */ ++static int cpuset_can_attach_check(struct cpuset *cs) ++{ ++ if (cpumask_empty(cs->effective_cpus) || ++ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed))) ++ return -ENOSPC; ++ return 0; ++} ++ + /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */ + static int cpuset_can_attach(struct cgroup_taskset *tset) + { +@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) + + percpu_down_write(&cpuset_rwsem); + +- /* allow moving tasks into an empty cpuset if on default hierarchy */ +- ret = -ENOSPC; +- if (!is_in_v2_mode() && +- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed))) +- goto out_unlock; +- +- /* +- * Task cannot be moved to a cpuset with empty effective cpus. +- */ +- if (cpumask_empty(cs->effective_cpus)) ++ /* Check to see if task is allowed in the cpuset */ ++ ret = cpuset_can_attach_check(cs); ++ if (ret) + goto out_unlock; + + cgroup_taskset_for_each(task, css, tset) { +@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) + * changes which zero cpus/mems_allowed. + */ + cs->attach_in_progress++; +- ret = 0; + out_unlock: + percpu_up_write(&cpuset_rwsem); + return ret; +@@ -3238,6 +3244,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) + percpu_up_write(&cpuset_rwsem); + } + ++/* ++ * In case the child is cloned into a cpuset different from its parent, ++ * additional checks are done to see if the move is allowed. ++ */ ++static int cpuset_can_fork(struct task_struct *task, struct css_set *cset) ++{ ++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); ++ bool same_cs; ++ int ret; ++ ++ rcu_read_lock(); ++ same_cs = (cs == task_cs(current)); ++ rcu_read_unlock(); ++ ++ if (same_cs) ++ return 0; ++ ++ lockdep_assert_held(&cgroup_mutex); ++ percpu_down_write(&cpuset_rwsem); ++ ++ /* Check to see if task is allowed in the cpuset */ ++ ret = cpuset_can_attach_check(cs); ++ if (ret) ++ goto out_unlock; ++ ++ ret = task_can_attach(task, cs->effective_cpus); ++ if (ret) ++ goto out_unlock; ++ ++ ret = security_task_setscheduler(task); ++ if (ret) ++ goto out_unlock; ++ ++ /* ++ * Mark attach is in progress. This makes validate_change() fail ++ * changes which zero cpus/mems_allowed. ++ */ ++ cs->attach_in_progress++; ++out_unlock: ++ percpu_up_write(&cpuset_rwsem); ++ return ret; ++} ++ ++static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset) ++{ ++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]); ++ bool same_cs; ++ ++ rcu_read_lock(); ++ same_cs = (cs == task_cs(current)); ++ rcu_read_unlock(); ++ ++ if (same_cs) ++ return; ++ ++ percpu_down_write(&cpuset_rwsem); ++ cs->attach_in_progress--; ++ if (!cs->attach_in_progress) ++ wake_up(&cpuset_attach_wq); ++ percpu_up_write(&cpuset_rwsem); ++} ++ + /* + * Make sure the new task conform to the current state of its parent, + * which could have been changed by cpuset just after it inherits the +@@ -3266,6 +3334,11 @@ static void cpuset_fork(struct task_struct *task) + percpu_down_write(&cpuset_rwsem); + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); + cpuset_attach_task(cs, task); ++ ++ cs->attach_in_progress--; ++ if (!cs->attach_in_progress) ++ wake_up(&cpuset_attach_wq); ++ + percpu_up_write(&cpuset_rwsem); + } + +@@ -3279,6 +3352,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = { + .attach = cpuset_attach, + .post_attach = cpuset_post_attach, + .bind = cpuset_bind, ++ .can_fork = cpuset_can_fork, ++ .cancel_fork = cpuset_cancel_fork, + .fork = cpuset_fork, + .legacy_cftypes = legacy_files, + .dfl_cftypes = dfl_files, +-- +2.39.2 + diff --git a/queue-6.1/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch b/queue-6.1/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch new file mode 100644 index 00000000000..97de9e57768 --- /dev/null +++ b/queue-6.1/cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch @@ -0,0 +1,133 @@ +From 0cd0e87ceae9b62f8bb13629d42a468ab41a846d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Apr 2023 09:35:58 -0400 +Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly + +From: Waiman Long + +[ Upstream commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c ] + +By default, the clone(2) syscall spawn a child process into the same +cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag +introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes +into cgroups"), the child will be spawned into a different cgroup which +is somewhat similar to writing the child's tid into "cgroup.threads". + +The current cpuset_fork() method does not properly handle the +CLONE_INTO_CGROUP case where the cpuset of the child may be different +from that of its parent. Update the cpuset_fork() method to treat the +CLONE_INTO_CGROUP case similar to cpuset_attach(). + +Since the newly cloned task has not been running yet, its actual +memory usage isn't known. So it is not necessary to make change to mm +in cpuset_fork(). + +Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups") +Reported-by: Giuseppe Scrivano +Signed-off-by: Waiman Long +Cc: stable@vger.kernel.org # v5.7+ +Signed-off-by: Tejun Heo +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++-------------- + 1 file changed, 42 insertions(+), 20 deletions(-) + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 1c549452c4abb..85f071fb1a414 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset) + } + + /* +- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach() ++ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task() + * but we can't allocate it dynamically there. Define it global and + * allocate from cpuset_init(). + */ + static cpumask_var_t cpus_attach; ++static nodemask_t cpuset_attach_nodemask_to; ++ ++static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task) ++{ ++ percpu_rwsem_assert_held(&cpuset_rwsem); ++ ++ if (cs != &top_cpuset) ++ guarantee_online_cpus(task, cpus_attach); ++ else ++ cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); ++ /* ++ * can_attach beforehand should guarantee that this doesn't ++ * fail. TODO: have a better way to handle failure here ++ */ ++ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); ++ ++ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); ++ cpuset_update_task_spread_flags(cs, task); ++} + + static void cpuset_attach(struct cgroup_taskset *tset) + { +- /* static buf protected by cpuset_rwsem */ +- static nodemask_t cpuset_attach_nodemask_to; + struct task_struct *task; + struct task_struct *leader; + struct cgroup_subsys_state *css; +@@ -2539,20 +2556,8 @@ static void cpuset_attach(struct cgroup_taskset *tset) + + guarantee_online_mems(cs, &cpuset_attach_nodemask_to); + +- cgroup_taskset_for_each(task, css, tset) { +- if (cs != &top_cpuset) +- guarantee_online_cpus(task, cpus_attach); +- else +- cpumask_copy(cpus_attach, task_cpu_possible_mask(task)); +- /* +- * can_attach beforehand should guarantee that this doesn't +- * fail. TODO: have a better way to handle failure here +- */ +- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); +- +- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); +- cpuset_update_task_spread_flags(cs, task); +- } ++ cgroup_taskset_for_each(task, css, tset) ++ cpuset_attach_task(cs, task); + + /* + * Change mm for all threadgroup leaders. This is expensive and may +@@ -3240,11 +3245,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) + */ + static void cpuset_fork(struct task_struct *task) + { +- if (task_css_is_root(task, cpuset_cgrp_id)) ++ struct cpuset *cs; ++ bool same_cs; ++ ++ rcu_read_lock(); ++ cs = task_cs(task); ++ same_cs = (cs == task_cs(current)); ++ rcu_read_unlock(); ++ ++ if (same_cs) { ++ if (cs == &top_cpuset) ++ return; ++ ++ set_cpus_allowed_ptr(task, current->cpus_ptr); ++ task->mems_allowed = current->mems_allowed; + return; ++ } + +- set_cpus_allowed_ptr(task, current->cpus_ptr); +- task->mems_allowed = current->mems_allowed; ++ /* CLONE_INTO_CGROUP */ ++ percpu_down_write(&cpuset_rwsem); ++ guarantee_online_mems(cs, &cpuset_attach_nodemask_to); ++ cpuset_attach_task(cs, task); ++ percpu_up_write(&cpuset_rwsem); + } + + struct cgroup_subsys cpuset_cgrp_subsys = { +-- +2.39.2 + diff --git a/queue-6.1/cgroup-cpuset-skip-spread-flags-update-on-v2.patch b/queue-6.1/cgroup-cpuset-skip-spread-flags-update-on-v2.patch new file mode 100644 index 00000000000..a7420638198 --- /dev/null +++ b/queue-6.1/cgroup-cpuset-skip-spread-flags-update-on-v2.patch @@ -0,0 +1,65 @@ +From 871105bedf332730e67131085ed96223fbd02242 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 12 Nov 2022 17:19:38 -0500 +Subject: cgroup/cpuset: Skip spread flags update on v2 + +From: Waiman Long + +[ Upstream commit 18f9a4d47527772515ad6cbdac796422566e6440 ] + +Cpuset v2 has no spread flags to set. So we can skip spread +flags update if cpuset v2 is being used. Also change the name to +cpuset_update_task_spread_flags() to indicate that there are multiple +spread flags. + +Signed-off-by: Waiman Long +Signed-off-by: Tejun Heo +Stable-dep-of: 42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly") +Signed-off-by: Sasha Levin +--- + kernel/cgroup/cpuset.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c +index 4c0c317083d05..1c549452c4abb 100644 +--- a/kernel/cgroup/cpuset.c ++++ b/kernel/cgroup/cpuset.c +@@ -550,11 +550,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask) + /* + * update task's spread flag if cpuset's page/slab spread flag is set + * +- * Call with callback_lock or cpuset_rwsem held. ++ * Call with callback_lock or cpuset_rwsem held. The check can be skipped ++ * if on default hierarchy. + */ +-static void cpuset_update_task_spread_flag(struct cpuset *cs, ++static void cpuset_update_task_spread_flags(struct cpuset *cs, + struct task_struct *tsk) + { ++ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys)) ++ return; ++ + if (is_spread_page(cs)) + task_set_spread_page(tsk); + else +@@ -2165,7 +2169,7 @@ static void update_tasks_flags(struct cpuset *cs) + + css_task_iter_start(&cs->css, 0, &it); + while ((task = css_task_iter_next(&it))) +- cpuset_update_task_spread_flag(cs, task); ++ cpuset_update_task_spread_flags(cs, task); + css_task_iter_end(&it); + } + +@@ -2547,7 +2551,7 @@ static void cpuset_attach(struct cgroup_taskset *tset) + WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach)); + + cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to); +- cpuset_update_task_spread_flag(cs, task); ++ cpuset_update_task_spread_flags(cs, task); + } + + /* +-- +2.39.2 + diff --git a/queue-6.1/cifs-fix-negotiate-context-parsing.patch b/queue-6.1/cifs-fix-negotiate-context-parsing.patch new file mode 100644 index 00000000000..ac29fbda2c3 --- /dev/null +++ b/queue-6.1/cifs-fix-negotiate-context-parsing.patch @@ -0,0 +1,124 @@ +From 8b36a61fe69357a0f42cfa8e2f6a60fda9a74ce2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 7 Apr 2023 00:34:11 +0200 +Subject: cifs: fix negotiate context parsing + +From: David Disseldorp + +[ Upstream commit 5105a7ffce19160e7062aee67fb6b3b8a1b56d78 ] + +smb311_decode_neg_context() doesn't properly check against SMB packet +boundaries prior to accessing individual negotiate context entries. This +is due to the length check omitting the eight byte smb2_neg_context +header, as well as incorrect decrementing of len_of_ctxts. + +Fixes: 5100d8a3fe03 ("SMB311: Improve checking of negotiate security contexts") +Reported-by: Volker Lendecke +Reviewed-by: Paulo Alcantara (SUSE) +Signed-off-by: David Disseldorp +Signed-off-by: Steve French +Signed-off-by: Sasha Levin +--- + fs/cifs/smb2pdu.c | 41 +++++++++++++++++++++++++++++++---------- + 1 file changed, 31 insertions(+), 10 deletions(-) + +diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c +index b37379b62cc77..ab59faf8a06a7 100644 +--- a/fs/cifs/smb2pdu.c ++++ b/fs/cifs/smb2pdu.c +@@ -588,11 +588,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req, + + } + ++/* If invalid preauth context warn but use what we requested, SHA-512 */ + static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt) + { + unsigned int len = le16_to_cpu(ctxt->DataLength); + +- /* If invalid preauth context warn but use what we requested, SHA-512 */ ++ /* ++ * Caller checked that DataLength remains within SMB boundary. We still ++ * need to confirm that one HashAlgorithms member is accounted for. ++ */ + if (len < MIN_PREAUTH_CTXT_DATA_LEN) { + pr_warn_once("server sent bad preauth context\n"); + return; +@@ -611,7 +615,11 @@ static void decode_compress_ctx(struct TCP_Server_Info *server, + { + unsigned int len = le16_to_cpu(ctxt->DataLength); + +- /* sizeof compress context is a one element compression capbility struct */ ++ /* ++ * Caller checked that DataLength remains within SMB boundary. We still ++ * need to confirm that one CompressionAlgorithms member is accounted ++ * for. ++ */ + if (len < 10) { + pr_warn_once("server sent bad compression cntxt\n"); + return; +@@ -633,6 +641,11 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server, + unsigned int len = le16_to_cpu(ctxt->DataLength); + + cifs_dbg(FYI, "decode SMB3.11 encryption neg context of len %d\n", len); ++ /* ++ * Caller checked that DataLength remains within SMB boundary. We still ++ * need to confirm that one Cipher flexible array member is accounted ++ * for. ++ */ + if (len < MIN_ENCRYPT_CTXT_DATA_LEN) { + pr_warn_once("server sent bad crypto ctxt len\n"); + return -EINVAL; +@@ -679,6 +692,11 @@ static void decode_signing_ctx(struct TCP_Server_Info *server, + { + unsigned int len = le16_to_cpu(pctxt->DataLength); + ++ /* ++ * Caller checked that DataLength remains within SMB boundary. We still ++ * need to confirm that one SigningAlgorithms flexible array member is ++ * accounted for. ++ */ + if ((len < 4) || (len > 16)) { + pr_warn_once("server sent bad signing negcontext\n"); + return; +@@ -720,14 +738,19 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp, + for (i = 0; i < ctxt_cnt; i++) { + int clen; + /* check that offset is not beyond end of SMB */ +- if (len_of_ctxts == 0) +- break; +- + if (len_of_ctxts < sizeof(struct smb2_neg_context)) + break; + + pctx = (struct smb2_neg_context *)(offset + (char *)rsp); +- clen = le16_to_cpu(pctx->DataLength); ++ clen = sizeof(struct smb2_neg_context) ++ + le16_to_cpu(pctx->DataLength); ++ /* ++ * 2.2.4 SMB2 NEGOTIATE Response ++ * Subsequent negotiate contexts MUST appear at the first 8-byte ++ * aligned offset following the previous negotiate context. ++ */ ++ if (i + 1 != ctxt_cnt) ++ clen = ALIGN(clen, 8); + if (clen > len_of_ctxts) + break; + +@@ -748,12 +771,10 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp, + else + cifs_server_dbg(VFS, "unknown negcontext of type %d ignored\n", + le16_to_cpu(pctx->ContextType)); +- + if (rc) + break; +- /* offsets must be 8 byte aligned */ +- clen = ALIGN(clen, 8); +- offset += clen + sizeof(struct smb2_neg_context); ++ ++ offset += clen; + len_of_ctxts -= clen; + } + return rc; +-- +2.39.2 + diff --git a/queue-6.1/documentation-riscv-document-the-sv57-vm-layout.patch b/queue-6.1/documentation-riscv-document-the-sv57-vm-layout.patch new file mode 100644 index 00000000000..32b8693773d --- /dev/null +++ b/queue-6.1/documentation-riscv-document-the-sv57-vm-layout.patch @@ -0,0 +1,73 @@ +From a6c803a703cd8d472810dae0ba1f2b2868a94354 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 18 Nov 2022 18:15:56 +0100 +Subject: Documentation: riscv: Document the sv57 VM layout +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Björn Töpel + +[ Upstream commit dd3553793a759e4f7f21c1aaffd5cb2de7a0068d ] + +RISC-V has been supporting the "sv57" address translation mode for a +while, but is has not been added to the VM layout documentation. Let +us fix that. + +Signed-off-by: Björn Töpel +Reviewed-by: Alexandre Ghiti +Link: https://lore.kernel.org/r/20221118171556.1612190-1-bjorn@kernel.org +Signed-off-by: Jonathan Corbet +Stable-dep-of: ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region") +Signed-off-by: Sasha Levin +--- + Documentation/riscv/vm-layout.rst | 36 +++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst +index 5b36e45fef60b..3be44e74ec5d6 100644 +--- a/Documentation/riscv/vm-layout.rst ++++ b/Documentation/riscv/vm-layout.rst +@@ -97,3 +97,39 @@ RISC-V Linux Kernel SV48 + ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF + ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel + __________________|____________|__________________|_________|____________________________________________________________ ++ ++ ++RISC-V Linux Kernel SV57 ++------------------------ ++ ++:: ++ ++ ======================================================================================================================== ++ Start addr | Offset | End addr | Size | VM area description ++ ======================================================================================================================== ++ | | | | ++ 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm ++ __________________|____________|__________________|_________|___________________________________________________________ ++ | | | | ++ 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, almost 64 bits wide hole of non-canonical ++ | | | | virtual memory addresses up to the -64 PB ++ | | | | starting offset of kernel mappings. ++ __________________|____________|__________________|_________|___________________________________________________________ ++ | ++ | Kernel-space virtual memory, shared between all processes: ++ ____________________________________________________________|___________________________________________________________ ++ | | | | ++ ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap ++ ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io ++ ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap ++ ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space ++ ff60000000000000 | -40 PB | ffdeffffffffffff | 32 PB | direct mapping of all physical memory ++ ffdf000000000000 | -8 PB | fffffffeffffffff | 8 PB | kasan ++ __________________|____________|__________________|_________|____________________________________________________________ ++ | ++ | Identical layout to the 39-bit one from here on: ++ ____________________________________________________________|____________________________________________________________ ++ | | | | ++ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF ++ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel ++ __________________|____________|__________________|_________|____________________________________________________________ +-- +2.39.2 + diff --git a/queue-6.1/i2c-mchp-pci1xxxx-update-timing-registers.patch b/queue-6.1/i2c-mchp-pci1xxxx-update-timing-registers.patch new file mode 100644 index 00000000000..e667676107c --- /dev/null +++ b/queue-6.1/i2c-mchp-pci1xxxx-update-timing-registers.patch @@ -0,0 +1,173 @@ +From 8a1aed7bcc603e9ebae8d163ef60c6ae09785dbe Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 20 Mar 2023 19:52:37 +0530 +Subject: i2c: mchp-pci1xxxx: Update Timing registers + +From: Tharun Kumar P + +[ Upstream commit aa874cdfec07d4dd9c6f0c356d65c609ba31a26f ] + +Update I2C timing registers based on latest hardware design. +This fix does not break functionality of chips with older design and +existing users will not be affected. + +Fixes: 361693697249 ("i2c: microchip: pci1xxxx: Add driver for I2C host controller in multifunction endpoint of pci1xxxx switch") +Signed-off-by: Tharun Kumar P +Reviewed-by: Andy Shevchenko +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-mchp-pci1xxxx.c | 60 +++++++++++++------------- + 1 file changed, 30 insertions(+), 30 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +index 09af759211478..b21ffd6df9276 100644 +--- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c ++++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c +@@ -48,9 +48,9 @@ + * SR_HOLD_TIME_XK_TICKS field will indicate the number of ticks of the + * baud clock required to program 'Hold Time' at X KHz. + */ +-#define SR_HOLD_TIME_100K_TICKS 133 +-#define SR_HOLD_TIME_400K_TICKS 20 +-#define SR_HOLD_TIME_1000K_TICKS 11 ++#define SR_HOLD_TIME_100K_TICKS 150 ++#define SR_HOLD_TIME_400K_TICKS 20 ++#define SR_HOLD_TIME_1000K_TICKS 12 + + #define SMB_CORE_COMPLETION_REG_OFF3 (SMBUS_MAST_CORE_ADDR_BASE + 0x23) + +@@ -65,17 +65,17 @@ + * the baud clock required to program 'fair idle delay' at X KHz. Fair idle + * delay establishes the MCTP T(IDLE_DELAY) period. + */ +-#define FAIR_BUS_IDLE_MIN_100K_TICKS 969 +-#define FAIR_BUS_IDLE_MIN_400K_TICKS 157 +-#define FAIR_BUS_IDLE_MIN_1000K_TICKS 157 ++#define FAIR_BUS_IDLE_MIN_100K_TICKS 992 ++#define FAIR_BUS_IDLE_MIN_400K_TICKS 500 ++#define FAIR_BUS_IDLE_MIN_1000K_TICKS 500 + + /* + * FAIR_IDLE_DELAY_XK_TICKS field will indicate the number of ticks of the + * baud clock required to satisfy the fairness protocol at X KHz. + */ +-#define FAIR_IDLE_DELAY_100K_TICKS 1000 +-#define FAIR_IDLE_DELAY_400K_TICKS 500 +-#define FAIR_IDLE_DELAY_1000K_TICKS 500 ++#define FAIR_IDLE_DELAY_100K_TICKS 963 ++#define FAIR_IDLE_DELAY_400K_TICKS 156 ++#define FAIR_IDLE_DELAY_1000K_TICKS 156 + + #define SMB_IDLE_SCALING_100K \ + ((FAIR_IDLE_DELAY_100K_TICKS << 16) | FAIR_BUS_IDLE_MIN_100K_TICKS) +@@ -105,7 +105,7 @@ + */ + #define BUS_CLK_100K_LOW_PERIOD_TICKS 156 + #define BUS_CLK_400K_LOW_PERIOD_TICKS 41 +-#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15 ++#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15 + + /* + * BUS_CLK_XK_HIGH_PERIOD_TICKS field defines the number of I2C Baud Clock +@@ -131,7 +131,7 @@ + */ + #define CLK_SYNC_100K 4 + #define CLK_SYNC_400K 4 +-#define CLK_SYNC_1000K 4 ++#define CLK_SYNC_1000K 4 + + #define SMB_CORE_DATA_TIMING_REG_OFF (SMBUS_MAST_CORE_ADDR_BASE + 0x40) + +@@ -142,25 +142,25 @@ + * determines the SCLK hold time following SDAT driven low during the first + * START bit in a transfer. + */ +-#define FIRST_START_HOLD_100K_TICKS 22 +-#define FIRST_START_HOLD_400K_TICKS 16 +-#define FIRST_START_HOLD_1000K_TICKS 6 ++#define FIRST_START_HOLD_100K_TICKS 23 ++#define FIRST_START_HOLD_400K_TICKS 8 ++#define FIRST_START_HOLD_1000K_TICKS 12 + + /* + * STOP_SETUP_XK_TICKS will indicate the number of ticks of the baud clock + * required to program 'STOP_SETUP' timer at X KHz. This timer determines the + * SDAT setup time from the rising edge of SCLK for a STOP condition. + */ +-#define STOP_SETUP_100K_TICKS 157 ++#define STOP_SETUP_100K_TICKS 150 + #define STOP_SETUP_400K_TICKS 20 +-#define STOP_SETUP_1000K_TICKS 12 ++#define STOP_SETUP_1000K_TICKS 12 + + /* + * RESTART_SETUP_XK_TICKS will indicate the number of ticks of the baud clock + * required to program 'RESTART_SETUP' timer at X KHz. This timer determines the + * SDAT setup time from the rising edge of SCLK for a repeated START condition. + */ +-#define RESTART_SETUP_100K_TICKS 157 ++#define RESTART_SETUP_100K_TICKS 156 + #define RESTART_SETUP_400K_TICKS 20 + #define RESTART_SETUP_1000K_TICKS 12 + +@@ -169,7 +169,7 @@ + * required to program 'DATA_HOLD' timer at X KHz. This timer determines the + * SDAT hold time following SCLK driven low. + */ +-#define DATA_HOLD_100K_TICKS 2 ++#define DATA_HOLD_100K_TICKS 12 + #define DATA_HOLD_400K_TICKS 2 + #define DATA_HOLD_1000K_TICKS 2 + +@@ -190,35 +190,35 @@ + * Bus Idle Minimum time = BUS_IDLE_MIN[7:0] x Baud_Clock_Period x + * (BUS_IDLE_MIN_XK_TICKS[7] ? 4,1) + */ +-#define BUS_IDLE_MIN_100K_TICKS 167UL +-#define BUS_IDLE_MIN_400K_TICKS 139UL +-#define BUS_IDLE_MIN_1000K_TICKS 133UL ++#define BUS_IDLE_MIN_100K_TICKS 36UL ++#define BUS_IDLE_MIN_400K_TICKS 10UL ++#define BUS_IDLE_MIN_1000K_TICKS 4UL + + /* + * CTRL_CUM_TIME_OUT_XK_TICKS defines SMBus Controller Cumulative Time-Out. + * SMBus Controller Cumulative Time-Out duration = + * CTRL_CUM_TIME_OUT_XK_TICKS[7:0] x Baud_Clock_Period x 2048 + */ +-#define CTRL_CUM_TIME_OUT_100K_TICKS 159 +-#define CTRL_CUM_TIME_OUT_400K_TICKS 159 +-#define CTRL_CUM_TIME_OUT_1000K_TICKS 159 ++#define CTRL_CUM_TIME_OUT_100K_TICKS 76 ++#define CTRL_CUM_TIME_OUT_400K_TICKS 76 ++#define CTRL_CUM_TIME_OUT_1000K_TICKS 76 + + /* + * TARGET_CUM_TIME_OUT_XK_TICKS defines SMBus Target Cumulative Time-Out duration. + * SMBus Target Cumulative Time-Out duration = TARGET_CUM_TIME_OUT_XK_TICKS[7:0] x + * Baud_Clock_Period x 4096 + */ +-#define TARGET_CUM_TIME_OUT_100K_TICKS 199 +-#define TARGET_CUM_TIME_OUT_400K_TICKS 199 +-#define TARGET_CUM_TIME_OUT_1000K_TICKS 199 ++#define TARGET_CUM_TIME_OUT_100K_TICKS 95 ++#define TARGET_CUM_TIME_OUT_400K_TICKS 95 ++#define TARGET_CUM_TIME_OUT_1000K_TICKS 95 + + /* + * CLOCK_HIGH_TIME_OUT_XK defines Clock High time out period. + * Clock High time out period = CLOCK_HIGH_TIME_OUT_XK[7:0] x Baud_Clock_Period x 8 + */ +-#define CLOCK_HIGH_TIME_OUT_100K_TICKS 204 +-#define CLOCK_HIGH_TIME_OUT_400K_TICKS 204 +-#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 204 ++#define CLOCK_HIGH_TIME_OUT_100K_TICKS 97 ++#define CLOCK_HIGH_TIME_OUT_400K_TICKS 97 ++#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 97 + + #define TO_SCALING_100K \ + ((BUS_IDLE_MIN_100K_TICKS << 24) | (CTRL_CUM_TIME_OUT_100K_TICKS << 16) | \ +-- +2.39.2 + diff --git a/queue-6.1/i2c-ocores-generate-stop-condition-after-timeout-in-.patch b/queue-6.1/i2c-ocores-generate-stop-condition-after-timeout-in-.patch new file mode 100644 index 00000000000..042ab12ebf8 --- /dev/null +++ b/queue-6.1/i2c-ocores-generate-stop-condition-after-timeout-in-.patch @@ -0,0 +1,106 @@ +From d321e57c13fb04e2c80f9f655d5db4e491e7f810 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Apr 2023 11:37:37 +0200 +Subject: i2c: ocores: generate stop condition after timeout in polling mode + +From: Gregor Herburger + +[ Upstream commit f8160d3b35fc94491bb0cb974dbda310ef96c0e2 ] + +In polling mode, no stop condition is generated after a timeout. This +causes SCL to remain low and thereby block the bus. If this happens +during a transfer it can cause slaves to misinterpret the subsequent +transfer and return wrong values. + +To solve this, pass the ETIMEDOUT error up from ocores_process_polling() +instead of setting STATE_ERROR directly. The caller is adjusted to call +ocores_process_timeout() on error both in polling and in IRQ mode, which +will set STATE_ERROR and generate a stop condition. + +Fixes: 69c8c0c0efa8 ("i2c: ocores: add polling interface") +Signed-off-by: Gregor Herburger +Signed-off-by: Matthias Schiffer +Acked-by: Peter Korsgaard +Reviewed-by: Andrew Lunn +Reviewed-by: Federico Vaga +Signed-off-by: Wolfram Sang +Signed-off-by: Sasha Levin +--- + drivers/i2c/busses/i2c-ocores.c | 35 ++++++++++++++++++--------------- + 1 file changed, 19 insertions(+), 16 deletions(-) + +diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c +index a0af027db04c1..2e575856c5cd5 100644 +--- a/drivers/i2c/busses/i2c-ocores.c ++++ b/drivers/i2c/busses/i2c-ocores.c +@@ -342,18 +342,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c) + * ocores_isr(), we just add our polling code around it. + * + * It can run in atomic context ++ * ++ * Return: 0 on success, -ETIMEDOUT on timeout + */ +-static void ocores_process_polling(struct ocores_i2c *i2c) ++static int ocores_process_polling(struct ocores_i2c *i2c) + { +- while (1) { +- irqreturn_t ret; +- int err; ++ irqreturn_t ret; ++ int err = 0; + ++ while (1) { + err = ocores_poll_wait(i2c); +- if (err) { +- i2c->state = STATE_ERROR; ++ if (err) + break; /* timeout */ +- } + + ret = ocores_isr(-1, i2c); + if (ret == IRQ_NONE) +@@ -364,13 +364,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c) + break; + } + } ++ ++ return err; + } + + static int ocores_xfer_core(struct ocores_i2c *i2c, + struct i2c_msg *msgs, int num, + bool polling) + { +- int ret; ++ int ret = 0; + u8 ctrl; + + ctrl = oc_getreg(i2c, OCI2C_CONTROL); +@@ -388,15 +390,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c, + oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START); + + if (polling) { +- ocores_process_polling(i2c); ++ ret = ocores_process_polling(i2c); + } else { +- ret = wait_event_timeout(i2c->wait, +- (i2c->state == STATE_ERROR) || +- (i2c->state == STATE_DONE), HZ); +- if (ret == 0) { +- ocores_process_timeout(i2c); +- return -ETIMEDOUT; +- } ++ if (wait_event_timeout(i2c->wait, ++ (i2c->state == STATE_ERROR) || ++ (i2c->state == STATE_DONE), HZ) == 0) ++ ret = -ETIMEDOUT; ++ } ++ if (ret) { ++ ocores_process_timeout(i2c); ++ return ret; + } + + return (i2c->state == STATE_DONE) ? num : -EIO; +-- +2.39.2 + diff --git a/queue-6.1/nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch b/queue-6.1/nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch new file mode 100644 index 00000000000..7341fc3ab91 --- /dev/null +++ b/queue-6.1/nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch @@ -0,0 +1,36 @@ +From b45965f6bc6d2af77cec3b5305a175f04308db88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Apr 2023 17:55:48 -0700 +Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD + +From: Duy Truong + +[ Upstream commit 74391b3e69855e7dd65a9cef36baf5fc1345affd ] + +Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting +duplicate NGUIDs. + +Signed-off-by: Duy Truong +Cc: stable@vger.kernel.org +Signed-off-by: Christoph Hellwig +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pci.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 1bef32cd10252..581bf94416e6d 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -3552,6 +3552,8 @@ static const struct pci_device_id nvme_id_table[] = { + { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ + .driver_data = NVME_QUIRK_BOGUS_NID | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, ++ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */ ++ .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), + .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, + { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), +-- +2.39.2 + diff --git a/queue-6.1/nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch b/queue-6.1/nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch new file mode 100644 index 00000000000..3821a9e096a --- /dev/null +++ b/queue-6.1/nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch @@ -0,0 +1,64 @@ +From 459f5ac581ed582b4c414173bb62e96213b216d8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Mar 2023 11:29:49 +0200 +Subject: nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN + +From: Juraj Pecigos + +[ Upstream commit 1231363aec86704a6b0467a12e3ca7bdf890e01d ] + +A system with more than one of these SSDs will only have one usable. +The kernel fails to detect more than one nvme device due to duplicate +cntlids. + +before: +[ 9.395229] nvme 0000:01:00.0: platform quirk: setting simple suspend +[ 9.395262] nvme nvme0: pci function 0000:01:00.0 +[ 9.395282] nvme 0000:03:00.0: platform quirk: setting simple suspend +[ 9.395305] nvme nvme1: pci function 0000:03:00.0 +[ 9.409873] nvme nvme0: Duplicate cntlid 1 with nvme1, subsys nqn.2022-07.com.siliconmotion:nvm-subsystem-sn- , rejecting +[ 9.409982] nvme nvme0: Removing after probe failure status: -22 +[ 9.427487] nvme nvme1: allocated 64 MiB host memory buffer. +[ 9.445088] nvme nvme1: 16/0/0 default/read/poll queues +[ 9.449898] nvme nvme1: Ignoring bogus Namespace Identifiers + +after: +[ 1.161890] nvme 0000:01:00.0: platform quirk: setting simple suspend +[ 1.162660] nvme nvme0: pci function 0000:01:00.0 +[ 1.162684] nvme 0000:03:00.0: platform quirk: setting simple suspend +[ 1.162707] nvme nvme1: pci function 0000:03:00.0 +[ 1.191354] nvme nvme0: allocated 64 MiB host memory buffer. +[ 1.193378] nvme nvme1: allocated 64 MiB host memory buffer. +[ 1.211044] nvme nvme1: 16/0/0 default/read/poll queues +[ 1.211080] nvme nvme0: 16/0/0 default/read/poll queues +[ 1.216145] nvme nvme0: Ignoring bogus Namespace Identifiers +[ 1.216261] nvme nvme1: Ignoring bogus Namespace Identifiers + +Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue. + +Signed-off-by: Juraj Pecigos +Reviewed-by: Chaitanya Kulkarni +Signed-off-by: Christoph Hellwig +Stable-dep-of: 74391b3e6985 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD") +Signed-off-by: Sasha Levin +--- + drivers/nvme/host/pci.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c +index 60452f6a9f711..1bef32cd10252 100644 +--- a/drivers/nvme/host/pci.c ++++ b/drivers/nvme/host/pci.c +@@ -3550,7 +3550,8 @@ static const struct pci_device_id nvme_id_table[] = { + { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */ + .driver_data = NVME_QUIRK_BOGUS_NID, }, + { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */ +- .driver_data = NVME_QUIRK_BOGUS_NID, }, ++ .driver_data = NVME_QUIRK_BOGUS_NID | ++ NVME_QUIRK_IGNORE_DEV_SUBNQN, }, + { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061), + .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, }, + { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065), +-- +2.39.2 + diff --git a/queue-6.1/powerpc-papr_scm-update-the-numa-distance-table-for-.patch b/queue-6.1/powerpc-papr_scm-update-the-numa-distance-table-for-.patch new file mode 100644 index 00000000000..3a1ae6110d5 --- /dev/null +++ b/queue-6.1/powerpc-papr_scm-update-the-numa-distance-table-for-.patch @@ -0,0 +1,84 @@ +From 3c008671fed52c8e248a3e549f3b90f463661181 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 4 Apr 2023 09:44:33 +0530 +Subject: powerpc/papr_scm: Update the NUMA distance table for the target node + +From: Aneesh Kumar K.V + +[ Upstream commit b277fc793daf258877b4c0744b52f69d6e6ba22e ] + +Platform device helper routines won't update the NUMA distance table +while creating a platform device, even if the device is present on a +NUMA node that doesn't have memory or CPU. This is especially true for +pmem devices. If the target node of the pmem device is not online, we +find the nearest online node to the device and associate the pmem device +with that online node. To find the nearest online node, we should have +the numa distance table updated correctly. Update the distance +information during the device probe. + +For a papr scm device on NUMA node 3 distance_lookup_table value for +distance_ref_points_depth = 2 before and after fix is below: + +Before fix: + node 3 distance depth 0 - 0 + node 3 distance depth 1 - 0 + node 4 distance depth 0 - 4 + node 4 distance depth 1 - 2 + node 5 distance depth 0 - 5 + node 5 distance depth 1 - 1 + +After fix + node 3 distance depth 0 - 3 + node 3 distance depth 1 - 1 + node 4 distance depth 0 - 4 + node 4 distance depth 1 - 2 + node 5 distance depth 0 - 5 + node 5 distance depth 1 - 1 + +Without the fix, the nearest numa node to the pmem device (NUMA node 3) +will be picked as 4. After the fix, we get the correct numa node which +is 5. + +Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device node is not online") +Signed-off-by: Aneesh Kumar K.V +Signed-off-by: Michael Ellerman +Link: https://msgid.link/20230404041433.1781804-1-aneesh.kumar@linux.ibm.com +Signed-off-by: Sasha Levin +--- + arch/powerpc/mm/numa.c | 1 + + arch/powerpc/platforms/pseries/papr_scm.c | 7 +++++++ + 2 files changed, 8 insertions(+) + +diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c +index b44ce71917d75..16cfe56be05bb 100644 +--- a/arch/powerpc/mm/numa.c ++++ b/arch/powerpc/mm/numa.c +@@ -366,6 +366,7 @@ void update_numa_distance(struct device_node *node) + WARN(numa_distance_table[nid][nid] == -1, + "NUMA distance details for node %d not provided\n", nid); + } ++EXPORT_SYMBOL_GPL(update_numa_distance); + + /* + * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN} +diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c +index 2f8385523a132..1a53e048ceb76 100644 +--- a/arch/powerpc/platforms/pseries/papr_scm.c ++++ b/arch/powerpc/platforms/pseries/papr_scm.c +@@ -1428,6 +1428,13 @@ static int papr_scm_probe(struct platform_device *pdev) + return -ENODEV; + } + ++ /* ++ * open firmware platform device create won't update the NUMA ++ * distance table. For PAPR SCM devices we use numa_map_to_online_node() ++ * to find the nearest online NUMA node and that requires correct ++ * distance table information. ++ */ ++ update_numa_distance(dn); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) +-- +2.39.2 + diff --git a/queue-6.1/purgatory-fix-disabling-debug-info.patch b/queue-6.1/purgatory-fix-disabling-debug-info.patch new file mode 100644 index 00000000000..a03194f2ead --- /dev/null +++ b/queue-6.1/purgatory-fix-disabling-debug-info.patch @@ -0,0 +1,61 @@ +From c8f04f3ea2ffd7d22b3539d9487fc8505d341988 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 26 Mar 2023 18:21:21 +0000 +Subject: purgatory: fix disabling debug info + +From: Alyssa Ross + +[ Upstream commit d83806c4c0cccc0d6d3c3581a11983a9c186a138 ] + +Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS. +Instead, it includes -g, the appropriate -gdwarf-* flag, and also the +-Wa versions of both of those if building with Clang and GNU as. As a +result, debug info was being generated for the purgatory objects, even +though the intention was that it not be. + +Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files") +Signed-off-by: Alyssa Ross +Cc: stable@vger.kernel.org +Acked-by: Nick Desaulniers +Signed-off-by: Masahiro Yamada +Signed-off-by: Sasha Levin +--- + arch/riscv/purgatory/Makefile | 7 +------ + arch/x86/purgatory/Makefile | 3 +-- + 2 files changed, 2 insertions(+), 8 deletions(-) + +diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile +index d16bf715a586b..5730797a6b402 100644 +--- a/arch/riscv/purgatory/Makefile ++++ b/arch/riscv/purgatory/Makefile +@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS) + CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE) + CFLAGS_ctype.o += $(PURGATORY_CFLAGS) + +-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 ++asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) + + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) +diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile +index 17f09dc263811..82fec66d46d29 100644 +--- a/arch/x86/purgatory/Makefile ++++ b/arch/x86/purgatory/Makefile +@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS) + CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE) + CFLAGS_string.o += $(PURGATORY_CFLAGS) + +-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2 +-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2 ++asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x)) + + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) +-- +2.39.2 + diff --git a/queue-6.1/risc-v-add-infrastructure-to-allow-different-str-imp.patch b/queue-6.1/risc-v-add-infrastructure-to-allow-different-str-imp.patch new file mode 100644 index 00000000000..85bd0afb6bb --- /dev/null +++ b/queue-6.1/risc-v-add-infrastructure-to-allow-different-str-imp.patch @@ -0,0 +1,262 @@ +From 3018f540f2064a9c0d2e59c56e84b398b3a6b0cb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 13 Jan 2023 22:23:00 +0100 +Subject: RISC-V: add infrastructure to allow different str* implementations + +From: Heiko Stuebner + +[ Upstream commit 56e0790c7f9e59ba6a0f4b59981d1d6fbf43efb0 ] + +Depending on supported extensions on specific RISC-V cores, +optimized str* functions might make sense. + +This adds basic infrastructure to allow patching the function calls +via alternatives later on. + +The Linux kernel provides standard implementations for string functions +but when architectures want to extend them, they need to provide their +own. + +The added generic string functions are done in assembler (taken from +disassembling the main-kernel functions for now) to allow us to control +the used registers and extend them with optimized variants. + +This doesn't override the compiler's use of builtin replacements. So still +first of all the compiler will select if a builtin will be better suitable +i.e. for known strings. For all regular cases we will want to later +select possible optimized variants and in the worst case fall back to the +generic implemention added with this change. + +Reviewed-by: Andrew Jones +Signed-off-by: Heiko Stuebner +Reviewed-by: Conor Dooley +Link: https://lore.kernel.org/r/20230113212301.3534711-2-heiko@sntech.de +Signed-off-by: Palmer Dabbelt +Stable-dep-of: d83806c4c0cc ("purgatory: fix disabling debug info") +Signed-off-by: Sasha Levin +--- + arch/riscv/include/asm/string.h | 10 ++++++++ + arch/riscv/kernel/riscv_ksyms.c | 3 +++ + arch/riscv/lib/Makefile | 3 +++ + arch/riscv/lib/strcmp.S | 36 +++++++++++++++++++++++++++++ + arch/riscv/lib/strlen.S | 28 ++++++++++++++++++++++ + arch/riscv/lib/strncmp.S | 41 +++++++++++++++++++++++++++++++++ + arch/riscv/purgatory/Makefile | 13 +++++++++++ + 7 files changed, 134 insertions(+) + create mode 100644 arch/riscv/lib/strcmp.S + create mode 100644 arch/riscv/lib/strlen.S + create mode 100644 arch/riscv/lib/strncmp.S + +diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h +index 9090493665555..a96b1fea24fe4 100644 +--- a/arch/riscv/include/asm/string.h ++++ b/arch/riscv/include/asm/string.h +@@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t); + #define __HAVE_ARCH_MEMMOVE + extern asmlinkage void *memmove(void *, const void *, size_t); + extern asmlinkage void *__memmove(void *, const void *, size_t); ++ ++#define __HAVE_ARCH_STRCMP ++extern asmlinkage int strcmp(const char *cs, const char *ct); ++ ++#define __HAVE_ARCH_STRLEN ++extern asmlinkage __kernel_size_t strlen(const char *); ++ ++#define __HAVE_ARCH_STRNCMP ++extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count); ++ + /* For those files which don't want to check by kasan. */ + #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__) + #define memcpy(dst, src, len) __memcpy(dst, src, len) +diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c +index 5ab1c7e1a6ed5..a72879b4249a5 100644 +--- a/arch/riscv/kernel/riscv_ksyms.c ++++ b/arch/riscv/kernel/riscv_ksyms.c +@@ -12,6 +12,9 @@ + EXPORT_SYMBOL(memset); + EXPORT_SYMBOL(memcpy); + EXPORT_SYMBOL(memmove); ++EXPORT_SYMBOL(strcmp); ++EXPORT_SYMBOL(strlen); ++EXPORT_SYMBOL(strncmp); + EXPORT_SYMBOL(__memset); + EXPORT_SYMBOL(__memcpy); + EXPORT_SYMBOL(__memmove); +diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile +index 25d5c9664e57e..6c74b0bedd60d 100644 +--- a/arch/riscv/lib/Makefile ++++ b/arch/riscv/lib/Makefile +@@ -3,6 +3,9 @@ lib-y += delay.o + lib-y += memcpy.o + lib-y += memset.o + lib-y += memmove.o ++lib-y += strcmp.o ++lib-y += strlen.o ++lib-y += strncmp.o + lib-$(CONFIG_MMU) += uaccess.o + lib-$(CONFIG_64BIT) += tishift.o + +diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S +new file mode 100644 +index 0000000000000..8babd712b9587 +--- /dev/null ++++ b/arch/riscv/lib/strcmp.S +@@ -0,0 +1,36 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#include ++#include ++#include ++ ++/* int strcmp(const char *cs, const char *ct) */ ++SYM_FUNC_START(strcmp) ++ /* ++ * Returns ++ * a0 - comparison result, value like strcmp ++ * ++ * Parameters ++ * a0 - string1 ++ * a1 - string2 ++ * ++ * Clobbers ++ * t0, t1 ++ */ ++1: ++ lbu t0, 0(a0) ++ lbu t1, 0(a1) ++ addi a0, a0, 1 ++ addi a1, a1, 1 ++ bne t0, t1, 2f ++ bnez t0, 1b ++ li a0, 0 ++ ret ++2: ++ /* ++ * strcmp only needs to return (< 0, 0, > 0) values ++ * not necessarily -1, 0, +1 ++ */ ++ sub a0, t0, t1 ++ ret ++SYM_FUNC_END(strcmp) +diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S +new file mode 100644 +index 0000000000000..0a3b11853efdb +--- /dev/null ++++ b/arch/riscv/lib/strlen.S +@@ -0,0 +1,28 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#include ++#include ++#include ++ ++/* int strlen(const char *s) */ ++SYM_FUNC_START(strlen) ++ /* ++ * Returns ++ * a0 - string length ++ * ++ * Parameters ++ * a0 - String to measure ++ * ++ * Clobbers: ++ * t0, t1 ++ */ ++ mv t1, a0 ++1: ++ lbu t0, 0(t1) ++ beqz t0, 2f ++ addi t1, t1, 1 ++ j 1b ++2: ++ sub a0, t1, a0 ++ ret ++SYM_FUNC_END(strlen) +diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S +new file mode 100644 +index 0000000000000..1f644d0a93f68 +--- /dev/null ++++ b/arch/riscv/lib/strncmp.S +@@ -0,0 +1,41 @@ ++/* SPDX-License-Identifier: GPL-2.0-only */ ++ ++#include ++#include ++#include ++ ++/* int strncmp(const char *cs, const char *ct, size_t count) */ ++SYM_FUNC_START(strncmp) ++ /* ++ * Returns ++ * a0 - comparison result, value like strncmp ++ * ++ * Parameters ++ * a0 - string1 ++ * a1 - string2 ++ * a2 - number of characters to compare ++ * ++ * Clobbers ++ * t0, t1, t2 ++ */ ++ li t2, 0 ++1: ++ beq a2, t2, 2f ++ lbu t0, 0(a0) ++ lbu t1, 0(a1) ++ addi a0, a0, 1 ++ addi a1, a1, 1 ++ bne t0, t1, 3f ++ addi t2, t2, 1 ++ bnez t0, 1b ++2: ++ li a0, 0 ++ ret ++3: ++ /* ++ * strncmp only needs to return (< 0, 0, > 0) values ++ * not necessarily -1, 0, +1 ++ */ ++ sub a0, t0, t1 ++ ret ++SYM_FUNC_END(strncmp) +diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile +index dd58e1d993972..d16bf715a586b 100644 +--- a/arch/riscv/purgatory/Makefile ++++ b/arch/riscv/purgatory/Makefile +@@ -2,6 +2,7 @@ + OBJECT_FILES_NON_STANDARD := y + + purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o ++purgatory-y += strcmp.o strlen.o strncmp.o + + targets += $(purgatory-y) + PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) +@@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE + $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE + $(call if_changed_rule,as_o_S) + ++$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE ++ $(call if_changed_rule,as_o_S) ++ ++$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE ++ $(call if_changed_rule,as_o_S) ++ ++$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE ++ $(call if_changed_rule,as_o_S) ++ + $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE + $(call if_changed_rule,cc_o_c) + +@@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS) + AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2 + AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2 + AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2 ++AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2 ++AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2 ++AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2 + + $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE + $(call if_changed,ld) +-- +2.39.2 + diff --git a/queue-6.1/riscv-move-early-dtb-mapping-into-the-fixmap-region.patch b/queue-6.1/riscv-move-early-dtb-mapping-into-the-fixmap-region.patch new file mode 100644 index 00000000000..3f712468e89 --- /dev/null +++ b/queue-6.1/riscv-move-early-dtb-mapping-into-the-fixmap-region.patch @@ -0,0 +1,264 @@ +From cbf41c65e31e4cd561076444cbdd7c2daaabda30 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Mar 2023 10:19:30 +0200 +Subject: riscv: Move early dtb mapping into the fixmap region + +From: Alexandre Ghiti + +[ Upstream commit ef69d2559fe91f23d27a3d6fd640b5641787d22e ] + +riscv establishes 2 virtual mappings: + +- early_pg_dir maps the kernel which allows to discover the system + memory +- swapper_pg_dir installs the final mapping (linear mapping included) + +We used to map the dtb in early_pg_dir using DTB_EARLY_BASE_VA, and this +mapping was not carried over in swapper_pg_dir. It happens that +early_init_fdt_scan_reserved_mem() must be called before swapper_pg_dir is +setup otherwise we could allocate reserved memory defined in the dtb. +And this function initializes reserved_mem variable with addresses that +lie in the early_pg_dir dtb mapping: when those addresses are reused +with swapper_pg_dir, this mapping does not exist and then we trap. + +The previous "fix" was incorrect as early_init_fdt_scan_reserved_mem() +must be called before swapper_pg_dir is set up otherwise we could +allocate in reserved memory defined in the dtb. + +So move the dtb mapping in the fixmap region which is established in +early_pg_dir and handed over to swapper_pg_dir. + +Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob") +Fixes: 8f3a2b4a96dc ("RISC-V: Move DT mapping outof fixmap") +Fixes: 50e63dd8ed92 ("riscv: fix reserved memory setup") +Reported-by: Conor Dooley +Link: https://lore.kernel.org/all/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/ +Signed-off-by: Alexandre Ghiti +Reviewed-by: Conor Dooley +Tested-by: Conor Dooley +Link: https://lore.kernel.org/r/20230329081932.79831-2-alexghiti@rivosinc.com +Cc: stable@vger.kernel.org +Signed-off-by: Palmer Dabbelt +Signed-off-by: Sasha Levin +--- + Documentation/riscv/vm-layout.rst | 6 +-- + arch/riscv/include/asm/fixmap.h | 8 ++++ + arch/riscv/include/asm/pgtable.h | 8 +++- + arch/riscv/kernel/setup.c | 1 - + arch/riscv/mm/init.c | 61 +++++++++++++++++-------------- + 5 files changed, 51 insertions(+), 33 deletions(-) + +diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst +index 3be44e74ec5d6..5462c84f4723f 100644 +--- a/Documentation/riscv/vm-layout.rst ++++ b/Documentation/riscv/vm-layout.rst +@@ -47,7 +47,7 @@ RISC-V Linux Kernel SV39 + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | +- ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap ++ ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap + ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io + ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap + ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space +@@ -83,7 +83,7 @@ RISC-V Linux Kernel SV48 + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | +- ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap ++ ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap + ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io + ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap + ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space +@@ -119,7 +119,7 @@ RISC-V Linux Kernel SV57 + | Kernel-space virtual memory, shared between all processes: + ____________________________________________________________|___________________________________________________________ + | | | | +- ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap ++ ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap + ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io + ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap + ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space +diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h +index 5c3e7b97fcc6f..0a55099bb7349 100644 +--- a/arch/riscv/include/asm/fixmap.h ++++ b/arch/riscv/include/asm/fixmap.h +@@ -22,6 +22,14 @@ + */ + enum fixed_addresses { + FIX_HOLE, ++ /* ++ * The fdt fixmap mapping must be PMD aligned and will be mapped ++ * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit. ++ */ ++ FIX_FDT_END, ++ FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, ++ ++ /* Below fixmaps will be mapped using fixmap_pte */ + FIX_PTE, + FIX_PMD, + FIX_PUD, +diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h +index 92ec2d9d7273f..2aeaf8e3a4ab0 100644 +--- a/arch/riscv/include/asm/pgtable.h ++++ b/arch/riscv/include/asm/pgtable.h +@@ -87,9 +87,13 @@ + + #define FIXADDR_TOP PCI_IO_START + #ifdef CONFIG_64BIT +-#define FIXADDR_SIZE PMD_SIZE ++#define MAX_FDT_SIZE PMD_SIZE ++#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M) ++#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE) + #else +-#define FIXADDR_SIZE PGDIR_SIZE ++#define MAX_FDT_SIZE PGDIR_SIZE ++#define FIX_FDT_SIZE MAX_FDT_SIZE ++#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE) + #endif + #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c +index d4a12233e728d..2acf51c235673 100644 +--- a/arch/riscv/kernel/setup.c ++++ b/arch/riscv/kernel/setup.c +@@ -280,7 +280,6 @@ void __init setup_arch(char **cmdline_p) + #else + unflatten_device_tree(); + #endif +- early_init_fdt_scan_reserved_mem(); + misc_mem_init(); + + init_resources(); +diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c +index 50a1b6edd4918..5570c52deb0b5 100644 +--- a/arch/riscv/mm/init.c ++++ b/arch/riscv/mm/init.c +@@ -57,7 +57,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] + EXPORT_SYMBOL(empty_zero_page); + + extern char _start[]; +-#define DTB_EARLY_BASE_VA PGDIR_SIZE + void *_dtb_early_va __initdata; + uintptr_t _dtb_early_pa __initdata; + +@@ -236,6 +235,14 @@ static void __init setup_bootmem(void) + set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); + + reserve_initrd_mem(); ++ ++ /* ++ * No allocation should be done before reserving the memory as defined ++ * in the device tree, otherwise the allocation could end up in a ++ * reserved region. ++ */ ++ early_init_fdt_scan_reserved_mem(); ++ + /* + * If DTB is built in, no need to reserve its memblock. + * Otherwise, do reserve it but avoid using +@@ -279,9 +286,6 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss; + static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; + + pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +-static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); +-static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE); +-static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); + + #ifdef CONFIG_XIP_KERNEL + #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops)) +@@ -626,9 +630,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, + #define trampoline_pgd_next (pgtable_l5_enabled ? \ + (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \ + (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd)) +-#define early_dtb_pgd_next (pgtable_l5_enabled ? \ +- (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \ +- (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd)) + #else + #define pgd_next_t pte_t + #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va) +@@ -636,7 +637,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp, + #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \ + create_pte_mapping(__nextp, __va, __pa, __sz, __prot) + #define fixmap_pgd_next ((uintptr_t)fixmap_pte) +-#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd) + #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) + #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) + #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0) +@@ -859,32 +859,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early) + * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR + * entry. + */ +-static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa) ++static void __init create_fdt_early_page_table(pgd_t *pgdir, ++ uintptr_t fix_fdt_va, ++ uintptr_t dtb_pa) + { +-#ifndef CONFIG_BUILTIN_DTB + uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1); + +- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA, +- IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa, +- PGDIR_SIZE, +- IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL); +- +- if (pgtable_l5_enabled) +- create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA, +- (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE); +- +- if (pgtable_l4_enabled) +- create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA, +- (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE); ++#ifndef CONFIG_BUILTIN_DTB ++ /* Make sure the fdt fixmap address is always aligned on PMD size */ ++ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE)); + +- if (IS_ENABLED(CONFIG_64BIT)) { +- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA, ++ /* In 32-bit only, the fdt lies in its own PGD */ ++ if (!IS_ENABLED(CONFIG_64BIT)) { ++ create_pgd_mapping(early_pg_dir, fix_fdt_va, ++ pa, MAX_FDT_SIZE, PAGE_KERNEL); ++ } else { ++ create_pmd_mapping(fixmap_pmd, fix_fdt_va, + pa, PMD_SIZE, PAGE_KERNEL); +- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE, ++ create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE, + pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); + } + +- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); ++ dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1)); + #else + /* + * For 64-bit kernel, __va can't be used since it would return a linear +@@ -1054,7 +1050,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) + create_kernel_page_table(early_pg_dir, true); + + /* Setup early mapping for FDT early scan */ +- create_fdt_early_page_table(early_pg_dir, dtb_pa); ++ create_fdt_early_page_table(early_pg_dir, ++ __fix_to_virt(FIX_FDT), dtb_pa); + + /* + * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap +@@ -1096,6 +1093,16 @@ static void __init setup_vm_final(void) + u64 i; + + /* Setup swapper PGD for fixmap */ ++#if !defined(CONFIG_64BIT) ++ /* ++ * In 32-bit, the device tree lies in a pgd entry, so it must be copied ++ * directly in swapper_pg_dir in addition to the pgd entry that points ++ * to fixmap_pte. ++ */ ++ unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT)); ++ ++ set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]); ++#endif + create_pgd_mapping(swapper_pg_dir, FIXADDR_START, + __pa_symbol(fixmap_pgd_next), + PGDIR_SIZE, PAGE_TABLE); +-- +2.39.2 + diff --git a/queue-6.1/sched-fair-fix-imbalance-overflow.patch b/queue-6.1/sched-fair-fix-imbalance-overflow.patch new file mode 100644 index 00000000000..20a3a461b7b --- /dev/null +++ b/queue-6.1/sched-fair-fix-imbalance-overflow.patch @@ -0,0 +1,48 @@ +From aade648641f22408ed5e5297350048704fcf3608 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 11 Apr 2023 11:06:11 +0200 +Subject: sched/fair: Fix imbalance overflow + +From: Vincent Guittot + +[ Upstream commit 91dcf1e8068e9a8823e419a7a34ff4341275fb70 ] + +When local group is fully busy but its average load is above system load, +computing the imbalance will overflow and local group is not the best +target for pulling this load. + +Fixes: 0b0695f2b34a ("sched/fair: Rework load_balance()") +Reported-by: Tingjia Cao +Signed-off-by: Vincent Guittot +Signed-off-by: Peter Zijlstra (Intel) +Tested-by: Tingjia Cao +Link: https://lore.kernel.org/lkml/CABcWv9_DAhVBOq2=W=2ypKE9dKM5s2DvoV8-U0+GDwwuKZ89jQ@mail.gmail.com/T/ +Signed-off-by: Sasha Levin +--- + kernel/sched/fair.c | 10 ++++++++++ + 1 file changed, 10 insertions(+) + +diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c +index 88821ab009b30..ec2d913280e6a 100644 +--- a/kernel/sched/fair.c ++++ b/kernel/sched/fair.c +@@ -10041,6 +10041,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s + + sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) / + sds->total_capacity; ++ ++ /* ++ * If the local group is more loaded than the average system ++ * load, don't try to pull any tasks. ++ */ ++ if (local->avg_load >= sds->avg_load) { ++ env->imbalance = 0; ++ return; ++ } ++ + } + + /* +-- +2.39.2 + diff --git a/queue-6.1/series b/queue-6.1/series index 07c43b52a62..2c77aa07296 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -115,3 +115,20 @@ drm-amd-pm-correct-smu13.0.7-pstate-profiling-clock-settings.patch drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch mptcp-stricter-state-check-in-mptcp_worker.patch +ubi-fix-deadlock-caused-by-recursively-holding-work_.patch +i2c-mchp-pci1xxxx-update-timing-registers.patch +ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch +powerpc-papr_scm-update-the-numa-distance-table-for-.patch +sched-fair-fix-imbalance-overflow.patch +x86-rtc-remove-__init-for-runtime-functions.patch +i2c-ocores-generate-stop-condition-after-timeout-in-.patch +cifs-fix-negotiate-context-parsing.patch +risc-v-add-infrastructure-to-allow-different-str-imp.patch +purgatory-fix-disabling-debug-info.patch +documentation-riscv-document-the-sv57-vm-layout.patch +riscv-move-early-dtb-mapping-into-the-fixmap-region.patch +nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch +nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch +cgroup-cpuset-skip-spread-flags-update-on-v2.patch +cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch +cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_.patch diff --git a/queue-6.1/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch b/queue-6.1/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch new file mode 100644 index 00000000000..fc8cab50332 --- /dev/null +++ b/queue-6.1/ubi-fix-deadlock-caused-by-recursively-holding-work_.patch @@ -0,0 +1,66 @@ +From 902ffed3205d4bc12d520309f4a9f162b6036f0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 4 Mar 2023 09:41:41 +0800 +Subject: ubi: Fix deadlock caused by recursively holding work_sem + +From: ZhaoLong Wang + +[ Upstream commit f773f0a331d6c41733b17bebbc1b6cae12e016f5 ] + +During the processing of the bgt, if the sync_erase() return -EBUSY +or some other error code in __erase_worker(),schedule_erase() called +again lead to the down_read(ubi->work_sem) hold twice and may get +block by down_write(ubi->work_sem) in ubi_update_fastmap(), +which cause deadlock. + + ubi bgt other task + do_work + down_read(&ubi->work_sem) ubi_update_fastmap + erase_worker # Blocked by down_read + __erase_worker down_write(&ubi->work_sem) + schedule_erase + schedule_ubi_work + down_read(&ubi->work_sem) + +Fix this by changing input parameter @nested of the schedule_erase() to +'true' to avoid recursively acquiring the down_read(&ubi->work_sem). + +Also, fix the incorrect comment about @nested parameter of the +schedule_erase() because when down_write(ubi->work_sem) is held, the +@nested is also need be true. + +Link: https://bugzilla.kernel.org/show_bug.cgi?id=217093 +Fixes: 2e8f08deabbc ("ubi: Fix races around ubi_refill_pools()") +Signed-off-by: ZhaoLong Wang +Reviewed-by: Zhihao Cheng +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + drivers/mtd/ubi/wl.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c +index 9e14319225c97..6049ab9e46479 100644 +--- a/drivers/mtd/ubi/wl.c ++++ b/drivers/mtd/ubi/wl.c +@@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk, + * @vol_id: the volume ID that last used this PEB + * @lnum: the last used logical eraseblock number for the PEB + * @torture: if the physical eraseblock has to be tortured +- * @nested: denotes whether the work_sem is already held in read mode ++ * @nested: denotes whether the work_sem is already held + * + * This function returns zero in case of success and a %-ENOMEM in case of + * failure. +@@ -1131,7 +1131,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk) + int err1; + + /* Re-schedule the LEB for erasure */ +- err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false); ++ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true); + if (err1) { + spin_lock(&ubi->wl_lock); + wl_entry_destroy(ubi, e); +-- +2.39.2 + diff --git a/queue-6.1/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch b/queue-6.1/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch new file mode 100644 index 00000000000..a42cc2c147a --- /dev/null +++ b/queue-6.1/ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch @@ -0,0 +1,79 @@ +From 9ceb73731c25f98bebb89008a028db23a7d5cade Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 6 Mar 2023 09:33:08 +0800 +Subject: ubi: Fix failure attaching when vid_hdr offset equals to (sub)page + size + +From: Zhihao Cheng + +[ Upstream commit 1e020e1b96afdecd20680b5b5be2a6ffc3d27628 ] + +Following process will make ubi attaching failed since commit +1b42b1a36fc946 ("ubi: ensure that VID header offset ... size"): + +ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB +modprobe nandsim id_bytes=$ID +flash_eraseall /dev/mtd0 +modprobe ubi mtd="0,2048" # set vid_hdr offset as 2048 (one page) +(dmesg): + ubi0 error: ubi_attach_mtd_dev [ubi]: VID header offset 2048 too large. + UBI error: cannot attach mtd0 + UBI error: cannot initialize UBI, error -22 + +Rework original solution, the key point is making sure +'vid_hdr_shift + UBI_VID_HDR_SIZE < ubi->vid_hdr_alsize', +so we should check vid_hdr_shift rather not vid_hdr_offset. +Then, ubi still support (sub)page aligined VID header offset. + +Fixes: 1b42b1a36fc946 ("ubi: ensure that VID header offset ... size") +Signed-off-by: Zhihao Cheng +Tested-by: Nicolas Schichan +Tested-by: Miquel Raynal # v5.10, v4.19 +Signed-off-by: Richard Weinberger +Signed-off-by: Sasha Levin +--- + drivers/mtd/ubi/build.c | 21 +++++++++++++++------ + 1 file changed, 15 insertions(+), 6 deletions(-) + +diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c +index 7f65af1697519..1662c12e24ada 100644 +--- a/drivers/mtd/ubi/build.c ++++ b/drivers/mtd/ubi/build.c +@@ -664,12 +664,6 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024) + ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size); + ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size); + +- if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) > +- ubi->vid_hdr_alsize)) { +- ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset); +- return -EINVAL; +- } +- + dbg_gen("min_io_size %d", ubi->min_io_size); + dbg_gen("max_write_size %d", ubi->max_write_size); + dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size); +@@ -687,6 +681,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024) + ubi->vid_hdr_aloffset; + } + ++ /* ++ * Memory allocation for VID header is ubi->vid_hdr_alsize ++ * which is described in comments in io.c. ++ * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds ++ * ubi->vid_hdr_alsize, so that all vid header operations ++ * won't access memory out of bounds. ++ */ ++ if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) { ++ ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)" ++ " + VID header size(%zu) > VID header aligned size(%d).", ++ ubi->vid_hdr_offset, ubi->vid_hdr_shift, ++ UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize); ++ return -EINVAL; ++ } ++ + /* Similar for the data offset */ + ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE; + ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size); +-- +2.39.2 + diff --git a/queue-6.1/x86-rtc-remove-__init-for-runtime-functions.patch b/queue-6.1/x86-rtc-remove-__init-for-runtime-functions.patch new file mode 100644 index 00000000000..91de554afca --- /dev/null +++ b/queue-6.1/x86-rtc-remove-__init-for-runtime-functions.patch @@ -0,0 +1,53 @@ +From a9718d50cb08e9319acde2c505a20bd4acaf0f96 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 6 Apr 2023 08:26:52 +0200 +Subject: x86/rtc: Remove __init for runtime functions + +From: Matija Glavinic Pecotic + +[ Upstream commit 775d3c514c5b2763a50ab7839026d7561795924d ] + +set_rtc_noop(), get_rtc_noop() are after booting, therefore their __init +annotation is wrong. + +A crash was observed on an x86 platform where CMOS RTC is unused and +disabled via device tree. set_rtc_noop() was invoked from ntp: +sync_hw_clock(), although CONFIG_RTC_SYSTOHC=n, however sync_cmos_clock() +doesn't honour that. + + Workqueue: events_power_efficient sync_hw_clock + RIP: 0010:set_rtc_noop + Call Trace: + update_persistent_clock64 + sync_hw_clock + +Fix this by dropping the __init annotation from set/get_rtc_noop(). + +Fixes: c311ed6183f4 ("x86/init: Allow DT configured systems to disable RTC at boot time") +Signed-off-by: Matija Glavinic Pecotic +Signed-off-by: Thomas Gleixner +Reviewed-by: Andy Shevchenko +Link: https://lore.kernel.org/r/59f7ceb1-446b-1d3d-0bc8-1f0ee94b1e18@nokia.com +Signed-off-by: Sasha Levin +--- + arch/x86/kernel/x86_init.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c +index ef80d361b4632..10622cf2b30f4 100644 +--- a/arch/x86/kernel/x86_init.c ++++ b/arch/x86/kernel/x86_init.c +@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; } + static void iommu_shutdown_noop(void) { } + bool __init bool_x86_init_noop(void) { return false; } + void x86_op_int_noop(int cpu) { } +-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } +-static __init void get_rtc_noop(struct timespec64 *now) { } ++static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; } ++static void get_rtc_noop(struct timespec64 *now) { } + + static __initconst const struct of_device_id of_cmos_match[] = { + { .compatible = "motorola,mc146818" }, +-- +2.39.2 +