--- /dev/null
+From ce2f88c9541a819b574d9ed9b84127db31227fb2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Apr 2023 09:35:59 -0400
+Subject: cgroup/cpuset: Add cpuset_can_fork() and cpuset_cancel_fork() methods
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit eee87853794187f6adbe19533ed79c8b44b36a91 ]
+
+In the case of CLONE_INTO_CGROUP, not all cpusets are ready to accept
+new tasks. It is too late to check that in cpuset_fork(). So we need
+to add the cpuset_can_fork() and cpuset_cancel_fork() methods to
+pre-check it before we can allow attachment to a different cpuset.
+
+We also need to set the attach_in_progress flag to alert other code
+that a new task is going to be added to the cpuset.
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Suggested-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 97 +++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 86 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 85f071fb1a414..e276db7228451 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2453,6 +2453,20 @@ static int fmeter_getrate(struct fmeter *fmp)
+
+ static struct cpuset *cpuset_attach_old_cs;
+
++/*
++ * Check to see if a cpuset can accept a new task
++ * For v1, cpus_allowed and mems_allowed can't be empty.
++ * For v2, effective_cpus can't be empty.
++ * Note that in v1, effective_cpus = cpus_allowed.
++ */
++static int cpuset_can_attach_check(struct cpuset *cs)
++{
++ if (cpumask_empty(cs->effective_cpus) ||
++ (!is_in_v2_mode() && nodes_empty(cs->mems_allowed)))
++ return -ENOSPC;
++ return 0;
++}
++
+ /* Called by cgroups to determine if a cpuset is usable; cpuset_rwsem held */
+ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ {
+@@ -2467,16 +2481,9 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
+
+ percpu_down_write(&cpuset_rwsem);
+
+- /* allow moving tasks into an empty cpuset if on default hierarchy */
+- ret = -ENOSPC;
+- if (!is_in_v2_mode() &&
+- (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
+- goto out_unlock;
+-
+- /*
+- * Task cannot be moved to a cpuset with empty effective cpus.
+- */
+- if (cpumask_empty(cs->effective_cpus))
++ /* Check to see if task is allowed in the cpuset */
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
+ goto out_unlock;
+
+ cgroup_taskset_for_each(task, css, tset) {
+@@ -2493,7 +2500,6 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
+ * changes which zero cpus/mems_allowed.
+ */
+ cs->attach_in_progress++;
+- ret = 0;
+ out_unlock:
+ percpu_up_write(&cpuset_rwsem);
+ return ret;
+@@ -3238,6 +3244,68 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ percpu_up_write(&cpuset_rwsem);
+ }
+
++/*
++ * In case the child is cloned into a cpuset different from its parent,
++ * additional checks are done to see if the move is allowed.
++ */
++static int cpuset_can_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++ int ret;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return 0;
++
++ lockdep_assert_held(&cgroup_mutex);
++ percpu_down_write(&cpuset_rwsem);
++
++ /* Check to see if task is allowed in the cpuset */
++ ret = cpuset_can_attach_check(cs);
++ if (ret)
++ goto out_unlock;
++
++ ret = task_can_attach(task, cs->effective_cpus);
++ if (ret)
++ goto out_unlock;
++
++ ret = security_task_setscheduler(task);
++ if (ret)
++ goto out_unlock;
++
++ /*
++ * Mark attach is in progress. This makes validate_change() fail
++ * changes which zero cpus/mems_allowed.
++ */
++ cs->attach_in_progress++;
++out_unlock:
++ percpu_up_write(&cpuset_rwsem);
++ return ret;
++}
++
++static void cpuset_cancel_fork(struct task_struct *task, struct css_set *cset)
++{
++ struct cpuset *cs = css_cs(cset->subsys[cpuset_cgrp_id]);
++ bool same_cs;
++
++ rcu_read_lock();
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs)
++ return;
++
++ percpu_down_write(&cpuset_rwsem);
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++ percpu_up_write(&cpuset_rwsem);
++}
++
+ /*
+ * Make sure the new task conform to the current state of its parent,
+ * which could have been changed by cpuset just after it inherits the
+@@ -3266,6 +3334,11 @@ static void cpuset_fork(struct task_struct *task)
+ percpu_down_write(&cpuset_rwsem);
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+ cpuset_attach_task(cs, task);
++
++ cs->attach_in_progress--;
++ if (!cs->attach_in_progress)
++ wake_up(&cpuset_attach_wq);
++
+ percpu_up_write(&cpuset_rwsem);
+ }
+
+@@ -3279,6 +3352,8 @@ struct cgroup_subsys cpuset_cgrp_subsys = {
+ .attach = cpuset_attach,
+ .post_attach = cpuset_post_attach,
+ .bind = cpuset_bind,
++ .can_fork = cpuset_can_fork,
++ .cancel_fork = cpuset_cancel_fork,
+ .fork = cpuset_fork,
+ .legacy_cftypes = legacy_files,
+ .dfl_cftypes = dfl_files,
+--
+2.39.2
+
--- /dev/null
+From 0cd0e87ceae9b62f8bb13629d42a468ab41a846d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Apr 2023 09:35:58 -0400
+Subject: cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 42a11bf5c5436e91b040aeb04063be1710bb9f9c ]
+
+By default, the clone(2) syscall spawn a child process into the same
+cgroup as its parent. With the use of the CLONE_INTO_CGROUP flag
+introduced by commit ef2c41cf38a7 ("clone3: allow spawning processes
+into cgroups"), the child will be spawned into a different cgroup which
+is somewhat similar to writing the child's tid into "cgroup.threads".
+
+The current cpuset_fork() method does not properly handle the
+CLONE_INTO_CGROUP case where the cpuset of the child may be different
+from that of its parent. Update the cpuset_fork() method to treat the
+CLONE_INTO_CGROUP case similar to cpuset_attach().
+
+Since the newly cloned task has not been running yet, its actual
+memory usage isn't known. So it is not necessary to make change to mm
+in cpuset_fork().
+
+Fixes: ef2c41cf38a7 ("clone3: allow spawning processes into cgroups")
+Reported-by: Giuseppe Scrivano <gscrivan@redhat.com>
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: stable@vger.kernel.org # v5.7+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 62 ++++++++++++++++++++++++++++--------------
+ 1 file changed, 42 insertions(+), 20 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 1c549452c4abb..85f071fb1a414 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -2515,16 +2515,33 @@ static void cpuset_cancel_attach(struct cgroup_taskset *tset)
+ }
+
+ /*
+- * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach()
++ * Protected by cpuset_rwsem. cpus_attach is used only by cpuset_attach_task()
+ * but we can't allocate it dynamically there. Define it global and
+ * allocate from cpuset_init().
+ */
+ static cpumask_var_t cpus_attach;
++static nodemask_t cpuset_attach_nodemask_to;
++
++static void cpuset_attach_task(struct cpuset *cs, struct task_struct *task)
++{
++ percpu_rwsem_assert_held(&cpuset_rwsem);
++
++ if (cs != &top_cpuset)
++ guarantee_online_cpus(task, cpus_attach);
++ else
++ cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
++ /*
++ * can_attach beforehand should guarantee that this doesn't
++ * fail. TODO: have a better way to handle failure here
++ */
++ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
++
++ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
++ cpuset_update_task_spread_flags(cs, task);
++}
+
+ static void cpuset_attach(struct cgroup_taskset *tset)
+ {
+- /* static buf protected by cpuset_rwsem */
+- static nodemask_t cpuset_attach_nodemask_to;
+ struct task_struct *task;
+ struct task_struct *leader;
+ struct cgroup_subsys_state *css;
+@@ -2539,20 +2556,8 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+
+ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
+
+- cgroup_taskset_for_each(task, css, tset) {
+- if (cs != &top_cpuset)
+- guarantee_online_cpus(task, cpus_attach);
+- else
+- cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
+- /*
+- * can_attach beforehand should guarantee that this doesn't
+- * fail. TODO: have a better way to handle failure here
+- */
+- WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+-
+- cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+- cpuset_update_task_spread_flags(cs, task);
+- }
++ cgroup_taskset_for_each(task, css, tset)
++ cpuset_attach_task(cs, task);
+
+ /*
+ * Change mm for all threadgroup leaders. This is expensive and may
+@@ -3240,11 +3245,28 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css)
+ */
+ static void cpuset_fork(struct task_struct *task)
+ {
+- if (task_css_is_root(task, cpuset_cgrp_id))
++ struct cpuset *cs;
++ bool same_cs;
++
++ rcu_read_lock();
++ cs = task_cs(task);
++ same_cs = (cs == task_cs(current));
++ rcu_read_unlock();
++
++ if (same_cs) {
++ if (cs == &top_cpuset)
++ return;
++
++ set_cpus_allowed_ptr(task, current->cpus_ptr);
++ task->mems_allowed = current->mems_allowed;
+ return;
++ }
+
+- set_cpus_allowed_ptr(task, current->cpus_ptr);
+- task->mems_allowed = current->mems_allowed;
++ /* CLONE_INTO_CGROUP */
++ percpu_down_write(&cpuset_rwsem);
++ guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
++ cpuset_attach_task(cs, task);
++ percpu_up_write(&cpuset_rwsem);
+ }
+
+ struct cgroup_subsys cpuset_cgrp_subsys = {
+--
+2.39.2
+
--- /dev/null
+From 871105bedf332730e67131085ed96223fbd02242 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Nov 2022 17:19:38 -0500
+Subject: cgroup/cpuset: Skip spread flags update on v2
+
+From: Waiman Long <longman@redhat.com>
+
+[ Upstream commit 18f9a4d47527772515ad6cbdac796422566e6440 ]
+
+Cpuset v2 has no spread flags to set. So we can skip spread
+flags update if cpuset v2 is being used. Also change the name to
+cpuset_update_task_spread_flags() to indicate that there are multiple
+spread flags.
+
+Signed-off-by: Waiman Long <longman@redhat.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Stable-dep-of: 42a11bf5c543 ("cgroup/cpuset: Make cpuset_fork() handle CLONE_INTO_CGROUP properly")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cpuset.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
+index 4c0c317083d05..1c549452c4abb 100644
+--- a/kernel/cgroup/cpuset.c
++++ b/kernel/cgroup/cpuset.c
+@@ -550,11 +550,15 @@ static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
+ /*
+ * update task's spread flag if cpuset's page/slab spread flag is set
+ *
+- * Call with callback_lock or cpuset_rwsem held.
++ * Call with callback_lock or cpuset_rwsem held. The check can be skipped
++ * if on default hierarchy.
+ */
+-static void cpuset_update_task_spread_flag(struct cpuset *cs,
++static void cpuset_update_task_spread_flags(struct cpuset *cs,
+ struct task_struct *tsk)
+ {
++ if (cgroup_subsys_on_dfl(cpuset_cgrp_subsys))
++ return;
++
+ if (is_spread_page(cs))
+ task_set_spread_page(tsk);
+ else
+@@ -2165,7 +2169,7 @@ static void update_tasks_flags(struct cpuset *cs)
+
+ css_task_iter_start(&cs->css, 0, &it);
+ while ((task = css_task_iter_next(&it)))
+- cpuset_update_task_spread_flag(cs, task);
++ cpuset_update_task_spread_flags(cs, task);
+ css_task_iter_end(&it);
+ }
+
+@@ -2547,7 +2551,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
+ WARN_ON_ONCE(set_cpus_allowed_ptr(task, cpus_attach));
+
+ cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
+- cpuset_update_task_spread_flag(cs, task);
++ cpuset_update_task_spread_flags(cs, task);
+ }
+
+ /*
+--
+2.39.2
+
--- /dev/null
+From 8b36a61fe69357a0f42cfa8e2f6a60fda9a74ce2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Apr 2023 00:34:11 +0200
+Subject: cifs: fix negotiate context parsing
+
+From: David Disseldorp <ddiss@suse.de>
+
+[ Upstream commit 5105a7ffce19160e7062aee67fb6b3b8a1b56d78 ]
+
+smb311_decode_neg_context() doesn't properly check against SMB packet
+boundaries prior to accessing individual negotiate context entries. This
+is due to the length check omitting the eight byte smb2_neg_context
+header, as well as incorrect decrementing of len_of_ctxts.
+
+Fixes: 5100d8a3fe03 ("SMB311: Improve checking of negotiate security contexts")
+Reported-by: Volker Lendecke <vl@samba.org>
+Reviewed-by: Paulo Alcantara (SUSE) <pc@manguebit.com>
+Signed-off-by: David Disseldorp <ddiss@suse.de>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/cifs/smb2pdu.c | 41 +++++++++++++++++++++++++++++++----------
+ 1 file changed, 31 insertions(+), 10 deletions(-)
+
+diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
+index b37379b62cc77..ab59faf8a06a7 100644
+--- a/fs/cifs/smb2pdu.c
++++ b/fs/cifs/smb2pdu.c
+@@ -588,11 +588,15 @@ assemble_neg_contexts(struct smb2_negotiate_req *req,
+
+ }
+
++/* If invalid preauth context warn but use what we requested, SHA-512 */
+ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
+ {
+ unsigned int len = le16_to_cpu(ctxt->DataLength);
+
+- /* If invalid preauth context warn but use what we requested, SHA-512 */
++ /*
++ * Caller checked that DataLength remains within SMB boundary. We still
++ * need to confirm that one HashAlgorithms member is accounted for.
++ */
+ if (len < MIN_PREAUTH_CTXT_DATA_LEN) {
+ pr_warn_once("server sent bad preauth context\n");
+ return;
+@@ -611,7 +615,11 @@ static void decode_compress_ctx(struct TCP_Server_Info *server,
+ {
+ unsigned int len = le16_to_cpu(ctxt->DataLength);
+
+- /* sizeof compress context is a one element compression capbility struct */
++ /*
++ * Caller checked that DataLength remains within SMB boundary. We still
++ * need to confirm that one CompressionAlgorithms member is accounted
++ * for.
++ */
+ if (len < 10) {
+ pr_warn_once("server sent bad compression cntxt\n");
+ return;
+@@ -633,6 +641,11 @@ static int decode_encrypt_ctx(struct TCP_Server_Info *server,
+ unsigned int len = le16_to_cpu(ctxt->DataLength);
+
+ cifs_dbg(FYI, "decode SMB3.11 encryption neg context of len %d\n", len);
++ /*
++ * Caller checked that DataLength remains within SMB boundary. We still
++ * need to confirm that one Cipher flexible array member is accounted
++ * for.
++ */
+ if (len < MIN_ENCRYPT_CTXT_DATA_LEN) {
+ pr_warn_once("server sent bad crypto ctxt len\n");
+ return -EINVAL;
+@@ -679,6 +692,11 @@ static void decode_signing_ctx(struct TCP_Server_Info *server,
+ {
+ unsigned int len = le16_to_cpu(pctxt->DataLength);
+
++ /*
++ * Caller checked that DataLength remains within SMB boundary. We still
++ * need to confirm that one SigningAlgorithms flexible array member is
++ * accounted for.
++ */
+ if ((len < 4) || (len > 16)) {
+ pr_warn_once("server sent bad signing negcontext\n");
+ return;
+@@ -720,14 +738,19 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
+ for (i = 0; i < ctxt_cnt; i++) {
+ int clen;
+ /* check that offset is not beyond end of SMB */
+- if (len_of_ctxts == 0)
+- break;
+-
+ if (len_of_ctxts < sizeof(struct smb2_neg_context))
+ break;
+
+ pctx = (struct smb2_neg_context *)(offset + (char *)rsp);
+- clen = le16_to_cpu(pctx->DataLength);
++ clen = sizeof(struct smb2_neg_context)
++ + le16_to_cpu(pctx->DataLength);
++ /*
++ * 2.2.4 SMB2 NEGOTIATE Response
++ * Subsequent negotiate contexts MUST appear at the first 8-byte
++ * aligned offset following the previous negotiate context.
++ */
++ if (i + 1 != ctxt_cnt)
++ clen = ALIGN(clen, 8);
+ if (clen > len_of_ctxts)
+ break;
+
+@@ -748,12 +771,10 @@ static int smb311_decode_neg_context(struct smb2_negotiate_rsp *rsp,
+ else
+ cifs_server_dbg(VFS, "unknown negcontext of type %d ignored\n",
+ le16_to_cpu(pctx->ContextType));
+-
+ if (rc)
+ break;
+- /* offsets must be 8 byte aligned */
+- clen = ALIGN(clen, 8);
+- offset += clen + sizeof(struct smb2_neg_context);
++
++ offset += clen;
+ len_of_ctxts -= clen;
+ }
+ return rc;
+--
+2.39.2
+
--- /dev/null
+From a6c803a703cd8d472810dae0ba1f2b2868a94354 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Nov 2022 18:15:56 +0100
+Subject: Documentation: riscv: Document the sv57 VM layout
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Björn Töpel <bjorn@rivosinc.com>
+
+[ Upstream commit dd3553793a759e4f7f21c1aaffd5cb2de7a0068d ]
+
+RISC-V has been supporting the "sv57" address translation mode for a
+while, but is has not been added to the VM layout documentation. Let
+us fix that.
+
+Signed-off-by: Björn Töpel <bjorn@rivosinc.com>
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Link: https://lore.kernel.org/r/20221118171556.1612190-1-bjorn@kernel.org
+Signed-off-by: Jonathan Corbet <corbet@lwn.net>
+Stable-dep-of: ef69d2559fe9 ("riscv: Move early dtb mapping into the fixmap region")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/riscv/vm-layout.rst | 36 +++++++++++++++++++++++++++++++
+ 1 file changed, 36 insertions(+)
+
+diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
+index 5b36e45fef60b..3be44e74ec5d6 100644
+--- a/Documentation/riscv/vm-layout.rst
++++ b/Documentation/riscv/vm-layout.rst
+@@ -97,3 +97,39 @@ RISC-V Linux Kernel SV48
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
+ __________________|____________|__________________|_________|____________________________________________________________
++
++
++RISC-V Linux Kernel SV57
++------------------------
++
++::
++
++ ========================================================================================================================
++ Start addr | Offset | End addr | Size | VM area description
++ ========================================================================================================================
++ | | | |
++ 0000000000000000 | 0 | 00ffffffffffffff | 64 PB | user-space virtual memory, different per mm
++ __________________|____________|__________________|_________|___________________________________________________________
++ | | | |
++ 0100000000000000 | +64 PB | feffffffffffffff | ~16K PB | ... huge, almost 64 bits wide hole of non-canonical
++ | | | | virtual memory addresses up to the -64 PB
++ | | | | starting offset of kernel mappings.
++ __________________|____________|__________________|_________|___________________________________________________________
++ |
++ | Kernel-space virtual memory, shared between all processes:
++ ____________________________________________________________|___________________________________________________________
++ | | | |
++ ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap
++ ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io
++ ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap
++ ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space
++ ff60000000000000 | -40 PB | ffdeffffffffffff | 32 PB | direct mapping of all physical memory
++ ffdf000000000000 | -8 PB | fffffffeffffffff | 8 PB | kasan
++ __________________|____________|__________________|_________|____________________________________________________________
++ |
++ | Identical layout to the 39-bit one from here on:
++ ____________________________________________________________|____________________________________________________________
++ | | | |
++ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules, BPF
++ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel
++ __________________|____________|__________________|_________|____________________________________________________________
+--
+2.39.2
+
--- /dev/null
+From 8a1aed7bcc603e9ebae8d163ef60c6ae09785dbe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Mar 2023 19:52:37 +0530
+Subject: i2c: mchp-pci1xxxx: Update Timing registers
+
+From: Tharun Kumar P <tharunkumar.pasumarthi@microchip.com>
+
+[ Upstream commit aa874cdfec07d4dd9c6f0c356d65c609ba31a26f ]
+
+Update I2C timing registers based on latest hardware design.
+This fix does not break functionality of chips with older design and
+existing users will not be affected.
+
+Fixes: 361693697249 ("i2c: microchip: pci1xxxx: Add driver for I2C host controller in multifunction endpoint of pci1xxxx switch")
+Signed-off-by: Tharun Kumar P <tharunkumar.pasumarthi@microchip.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-mchp-pci1xxxx.c | 60 +++++++++++++-------------
+ 1 file changed, 30 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
+index 09af759211478..b21ffd6df9276 100644
+--- a/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
++++ b/drivers/i2c/busses/i2c-mchp-pci1xxxx.c
+@@ -48,9 +48,9 @@
+ * SR_HOLD_TIME_XK_TICKS field will indicate the number of ticks of the
+ * baud clock required to program 'Hold Time' at X KHz.
+ */
+-#define SR_HOLD_TIME_100K_TICKS 133
+-#define SR_HOLD_TIME_400K_TICKS 20
+-#define SR_HOLD_TIME_1000K_TICKS 11
++#define SR_HOLD_TIME_100K_TICKS 150
++#define SR_HOLD_TIME_400K_TICKS 20
++#define SR_HOLD_TIME_1000K_TICKS 12
+
+ #define SMB_CORE_COMPLETION_REG_OFF3 (SMBUS_MAST_CORE_ADDR_BASE + 0x23)
+
+@@ -65,17 +65,17 @@
+ * the baud clock required to program 'fair idle delay' at X KHz. Fair idle
+ * delay establishes the MCTP T(IDLE_DELAY) period.
+ */
+-#define FAIR_BUS_IDLE_MIN_100K_TICKS 969
+-#define FAIR_BUS_IDLE_MIN_400K_TICKS 157
+-#define FAIR_BUS_IDLE_MIN_1000K_TICKS 157
++#define FAIR_BUS_IDLE_MIN_100K_TICKS 992
++#define FAIR_BUS_IDLE_MIN_400K_TICKS 500
++#define FAIR_BUS_IDLE_MIN_1000K_TICKS 500
+
+ /*
+ * FAIR_IDLE_DELAY_XK_TICKS field will indicate the number of ticks of the
+ * baud clock required to satisfy the fairness protocol at X KHz.
+ */
+-#define FAIR_IDLE_DELAY_100K_TICKS 1000
+-#define FAIR_IDLE_DELAY_400K_TICKS 500
+-#define FAIR_IDLE_DELAY_1000K_TICKS 500
++#define FAIR_IDLE_DELAY_100K_TICKS 963
++#define FAIR_IDLE_DELAY_400K_TICKS 156
++#define FAIR_IDLE_DELAY_1000K_TICKS 156
+
+ #define SMB_IDLE_SCALING_100K \
+ ((FAIR_IDLE_DELAY_100K_TICKS << 16) | FAIR_BUS_IDLE_MIN_100K_TICKS)
+@@ -105,7 +105,7 @@
+ */
+ #define BUS_CLK_100K_LOW_PERIOD_TICKS 156
+ #define BUS_CLK_400K_LOW_PERIOD_TICKS 41
+-#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15
++#define BUS_CLK_1000K_LOW_PERIOD_TICKS 15
+
+ /*
+ * BUS_CLK_XK_HIGH_PERIOD_TICKS field defines the number of I2C Baud Clock
+@@ -131,7 +131,7 @@
+ */
+ #define CLK_SYNC_100K 4
+ #define CLK_SYNC_400K 4
+-#define CLK_SYNC_1000K 4
++#define CLK_SYNC_1000K 4
+
+ #define SMB_CORE_DATA_TIMING_REG_OFF (SMBUS_MAST_CORE_ADDR_BASE + 0x40)
+
+@@ -142,25 +142,25 @@
+ * determines the SCLK hold time following SDAT driven low during the first
+ * START bit in a transfer.
+ */
+-#define FIRST_START_HOLD_100K_TICKS 22
+-#define FIRST_START_HOLD_400K_TICKS 16
+-#define FIRST_START_HOLD_1000K_TICKS 6
++#define FIRST_START_HOLD_100K_TICKS 23
++#define FIRST_START_HOLD_400K_TICKS 8
++#define FIRST_START_HOLD_1000K_TICKS 12
+
+ /*
+ * STOP_SETUP_XK_TICKS will indicate the number of ticks of the baud clock
+ * required to program 'STOP_SETUP' timer at X KHz. This timer determines the
+ * SDAT setup time from the rising edge of SCLK for a STOP condition.
+ */
+-#define STOP_SETUP_100K_TICKS 157
++#define STOP_SETUP_100K_TICKS 150
+ #define STOP_SETUP_400K_TICKS 20
+-#define STOP_SETUP_1000K_TICKS 12
++#define STOP_SETUP_1000K_TICKS 12
+
+ /*
+ * RESTART_SETUP_XK_TICKS will indicate the number of ticks of the baud clock
+ * required to program 'RESTART_SETUP' timer at X KHz. This timer determines the
+ * SDAT setup time from the rising edge of SCLK for a repeated START condition.
+ */
+-#define RESTART_SETUP_100K_TICKS 157
++#define RESTART_SETUP_100K_TICKS 156
+ #define RESTART_SETUP_400K_TICKS 20
+ #define RESTART_SETUP_1000K_TICKS 12
+
+@@ -169,7 +169,7 @@
+ * required to program 'DATA_HOLD' timer at X KHz. This timer determines the
+ * SDAT hold time following SCLK driven low.
+ */
+-#define DATA_HOLD_100K_TICKS 2
++#define DATA_HOLD_100K_TICKS 12
+ #define DATA_HOLD_400K_TICKS 2
+ #define DATA_HOLD_1000K_TICKS 2
+
+@@ -190,35 +190,35 @@
+ * Bus Idle Minimum time = BUS_IDLE_MIN[7:0] x Baud_Clock_Period x
+ * (BUS_IDLE_MIN_XK_TICKS[7] ? 4,1)
+ */
+-#define BUS_IDLE_MIN_100K_TICKS 167UL
+-#define BUS_IDLE_MIN_400K_TICKS 139UL
+-#define BUS_IDLE_MIN_1000K_TICKS 133UL
++#define BUS_IDLE_MIN_100K_TICKS 36UL
++#define BUS_IDLE_MIN_400K_TICKS 10UL
++#define BUS_IDLE_MIN_1000K_TICKS 4UL
+
+ /*
+ * CTRL_CUM_TIME_OUT_XK_TICKS defines SMBus Controller Cumulative Time-Out.
+ * SMBus Controller Cumulative Time-Out duration =
+ * CTRL_CUM_TIME_OUT_XK_TICKS[7:0] x Baud_Clock_Period x 2048
+ */
+-#define CTRL_CUM_TIME_OUT_100K_TICKS 159
+-#define CTRL_CUM_TIME_OUT_400K_TICKS 159
+-#define CTRL_CUM_TIME_OUT_1000K_TICKS 159
++#define CTRL_CUM_TIME_OUT_100K_TICKS 76
++#define CTRL_CUM_TIME_OUT_400K_TICKS 76
++#define CTRL_CUM_TIME_OUT_1000K_TICKS 76
+
+ /*
+ * TARGET_CUM_TIME_OUT_XK_TICKS defines SMBus Target Cumulative Time-Out duration.
+ * SMBus Target Cumulative Time-Out duration = TARGET_CUM_TIME_OUT_XK_TICKS[7:0] x
+ * Baud_Clock_Period x 4096
+ */
+-#define TARGET_CUM_TIME_OUT_100K_TICKS 199
+-#define TARGET_CUM_TIME_OUT_400K_TICKS 199
+-#define TARGET_CUM_TIME_OUT_1000K_TICKS 199
++#define TARGET_CUM_TIME_OUT_100K_TICKS 95
++#define TARGET_CUM_TIME_OUT_400K_TICKS 95
++#define TARGET_CUM_TIME_OUT_1000K_TICKS 95
+
+ /*
+ * CLOCK_HIGH_TIME_OUT_XK defines Clock High time out period.
+ * Clock High time out period = CLOCK_HIGH_TIME_OUT_XK[7:0] x Baud_Clock_Period x 8
+ */
+-#define CLOCK_HIGH_TIME_OUT_100K_TICKS 204
+-#define CLOCK_HIGH_TIME_OUT_400K_TICKS 204
+-#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 204
++#define CLOCK_HIGH_TIME_OUT_100K_TICKS 97
++#define CLOCK_HIGH_TIME_OUT_400K_TICKS 97
++#define CLOCK_HIGH_TIME_OUT_1000K_TICKS 97
+
+ #define TO_SCALING_100K \
+ ((BUS_IDLE_MIN_100K_TICKS << 24) | (CTRL_CUM_TIME_OUT_100K_TICKS << 16) | \
+--
+2.39.2
+
--- /dev/null
+From d321e57c13fb04e2c80f9f655d5db4e491e7f810 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Apr 2023 11:37:37 +0200
+Subject: i2c: ocores: generate stop condition after timeout in polling mode
+
+From: Gregor Herburger <gregor.herburger@tq-group.com>
+
+[ Upstream commit f8160d3b35fc94491bb0cb974dbda310ef96c0e2 ]
+
+In polling mode, no stop condition is generated after a timeout. This
+causes SCL to remain low and thereby block the bus. If this happens
+during a transfer it can cause slaves to misinterpret the subsequent
+transfer and return wrong values.
+
+To solve this, pass the ETIMEDOUT error up from ocores_process_polling()
+instead of setting STATE_ERROR directly. The caller is adjusted to call
+ocores_process_timeout() on error both in polling and in IRQ mode, which
+will set STATE_ERROR and generate a stop condition.
+
+Fixes: 69c8c0c0efa8 ("i2c: ocores: add polling interface")
+Signed-off-by: Gregor Herburger <gregor.herburger@tq-group.com>
+Signed-off-by: Matthias Schiffer <matthias.schiffer@ew.tq-group.com>
+Acked-by: Peter Korsgaard <peter@korsgaard.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Reviewed-by: Federico Vaga <federico.vaga@cern.ch>
+Signed-off-by: Wolfram Sang <wsa@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/i2c/busses/i2c-ocores.c | 35 ++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 16 deletions(-)
+
+diff --git a/drivers/i2c/busses/i2c-ocores.c b/drivers/i2c/busses/i2c-ocores.c
+index a0af027db04c1..2e575856c5cd5 100644
+--- a/drivers/i2c/busses/i2c-ocores.c
++++ b/drivers/i2c/busses/i2c-ocores.c
+@@ -342,18 +342,18 @@ static int ocores_poll_wait(struct ocores_i2c *i2c)
+ * ocores_isr(), we just add our polling code around it.
+ *
+ * It can run in atomic context
++ *
++ * Return: 0 on success, -ETIMEDOUT on timeout
+ */
+-static void ocores_process_polling(struct ocores_i2c *i2c)
++static int ocores_process_polling(struct ocores_i2c *i2c)
+ {
+- while (1) {
+- irqreturn_t ret;
+- int err;
++ irqreturn_t ret;
++ int err = 0;
+
++ while (1) {
+ err = ocores_poll_wait(i2c);
+- if (err) {
+- i2c->state = STATE_ERROR;
++ if (err)
+ break; /* timeout */
+- }
+
+ ret = ocores_isr(-1, i2c);
+ if (ret == IRQ_NONE)
+@@ -364,13 +364,15 @@ static void ocores_process_polling(struct ocores_i2c *i2c)
+ break;
+ }
+ }
++
++ return err;
+ }
+
+ static int ocores_xfer_core(struct ocores_i2c *i2c,
+ struct i2c_msg *msgs, int num,
+ bool polling)
+ {
+- int ret;
++ int ret = 0;
+ u8 ctrl;
+
+ ctrl = oc_getreg(i2c, OCI2C_CONTROL);
+@@ -388,15 +390,16 @@ static int ocores_xfer_core(struct ocores_i2c *i2c,
+ oc_setreg(i2c, OCI2C_CMD, OCI2C_CMD_START);
+
+ if (polling) {
+- ocores_process_polling(i2c);
++ ret = ocores_process_polling(i2c);
+ } else {
+- ret = wait_event_timeout(i2c->wait,
+- (i2c->state == STATE_ERROR) ||
+- (i2c->state == STATE_DONE), HZ);
+- if (ret == 0) {
+- ocores_process_timeout(i2c);
+- return -ETIMEDOUT;
+- }
++ if (wait_event_timeout(i2c->wait,
++ (i2c->state == STATE_ERROR) ||
++ (i2c->state == STATE_DONE), HZ) == 0)
++ ret = -ETIMEDOUT;
++ }
++ if (ret) {
++ ocores_process_timeout(i2c);
++ return ret;
+ }
+
+ return (i2c->state == STATE_DONE) ? num : -EIO;
+--
+2.39.2
+
--- /dev/null
+From b45965f6bc6d2af77cec3b5305a175f04308db88 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Apr 2023 17:55:48 -0700
+Subject: nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD
+
+From: Duy Truong <dory@dory.moe>
+
+[ Upstream commit 74391b3e69855e7dd65a9cef36baf5fc1345affd ]
+
+Added a quirk to fix the TeamGroup T-Force Cardea Zero Z330 SSDs reporting
+duplicate NGUIDs.
+
+Signed-off-by: Duy Truong <dory@dory.moe>
+Cc: stable@vger.kernel.org
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 1bef32cd10252..581bf94416e6d 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -3552,6 +3552,8 @@ static const struct pci_device_id nvme_id_table[] = {
+ { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
+ .driver_data = NVME_QUIRK_BOGUS_NID |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
++ { PCI_DEVICE(0x10ec, 0x5763), /* TEAMGROUP T-FORCE CARDEA ZERO Z330 SSD */
++ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
+ .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
+--
+2.39.2
+
--- /dev/null
+From 459f5ac581ed582b4c414173bb62e96213b216d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Mar 2023 11:29:49 +0200
+Subject: nvme-pci: mark Lexar NM760 as IGNORE_DEV_SUBNQN
+
+From: Juraj Pecigos <kernel@juraj.dev>
+
+[ Upstream commit 1231363aec86704a6b0467a12e3ca7bdf890e01d ]
+
+A system with more than one of these SSDs will only have one usable.
+The kernel fails to detect more than one nvme device due to duplicate
+cntlids.
+
+before:
+[ 9.395229] nvme 0000:01:00.0: platform quirk: setting simple suspend
+[ 9.395262] nvme nvme0: pci function 0000:01:00.0
+[ 9.395282] nvme 0000:03:00.0: platform quirk: setting simple suspend
+[ 9.395305] nvme nvme1: pci function 0000:03:00.0
+[ 9.409873] nvme nvme0: Duplicate cntlid 1 with nvme1, subsys nqn.2022-07.com.siliconmotion:nvm-subsystem-sn- , rejecting
+[ 9.409982] nvme nvme0: Removing after probe failure status: -22
+[ 9.427487] nvme nvme1: allocated 64 MiB host memory buffer.
+[ 9.445088] nvme nvme1: 16/0/0 default/read/poll queues
+[ 9.449898] nvme nvme1: Ignoring bogus Namespace Identifiers
+
+after:
+[ 1.161890] nvme 0000:01:00.0: platform quirk: setting simple suspend
+[ 1.162660] nvme nvme0: pci function 0000:01:00.0
+[ 1.162684] nvme 0000:03:00.0: platform quirk: setting simple suspend
+[ 1.162707] nvme nvme1: pci function 0000:03:00.0
+[ 1.191354] nvme nvme0: allocated 64 MiB host memory buffer.
+[ 1.193378] nvme nvme1: allocated 64 MiB host memory buffer.
+[ 1.211044] nvme nvme1: 16/0/0 default/read/poll queues
+[ 1.211080] nvme nvme0: 16/0/0 default/read/poll queues
+[ 1.216145] nvme nvme0: Ignoring bogus Namespace Identifiers
+[ 1.216261] nvme nvme1: Ignoring bogus Namespace Identifiers
+
+Adding the NVME_QUIRK_IGNORE_DEV_SUBNQN quirk to resolves the issue.
+
+Signed-off-by: Juraj Pecigos <kernel@juraj.dev>
+Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Stable-dep-of: 74391b3e6985 ("nvme-pci: add NVME_QUIRK_BOGUS_NID for T-FORCE Z330 SSD")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/pci.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
+index 60452f6a9f711..1bef32cd10252 100644
+--- a/drivers/nvme/host/pci.c
++++ b/drivers/nvme/host/pci.c
+@@ -3550,7 +3550,8 @@ static const struct pci_device_id nvme_id_table[] = {
+ { PCI_DEVICE(0x1d97, 0x1d97), /* Lexar NM620 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1d97, 0x2269), /* Lexar NM760 */
+- .driver_data = NVME_QUIRK_BOGUS_NID, },
++ .driver_data = NVME_QUIRK_BOGUS_NID |
++ NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
+ .driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
+--
+2.39.2
+
--- /dev/null
+From 3c008671fed52c8e248a3e549f3b90f463661181 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 09:44:33 +0530
+Subject: powerpc/papr_scm: Update the NUMA distance table for the target node
+
+From: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+
+[ Upstream commit b277fc793daf258877b4c0744b52f69d6e6ba22e ]
+
+Platform device helper routines won't update the NUMA distance table
+while creating a platform device, even if the device is present on a
+NUMA node that doesn't have memory or CPU. This is especially true for
+pmem devices. If the target node of the pmem device is not online, we
+find the nearest online node to the device and associate the pmem device
+with that online node. To find the nearest online node, we should have
+the numa distance table updated correctly. Update the distance
+information during the device probe.
+
+For a papr scm device on NUMA node 3 distance_lookup_table value for
+distance_ref_points_depth = 2 before and after fix is below:
+
+Before fix:
+ node 3 distance depth 0 - 0
+ node 3 distance depth 1 - 0
+ node 4 distance depth 0 - 4
+ node 4 distance depth 1 - 2
+ node 5 distance depth 0 - 5
+ node 5 distance depth 1 - 1
+
+After fix
+ node 3 distance depth 0 - 3
+ node 3 distance depth 1 - 1
+ node 4 distance depth 0 - 4
+ node 4 distance depth 1 - 2
+ node 5 distance depth 0 - 5
+ node 5 distance depth 1 - 1
+
+Without the fix, the nearest numa node to the pmem device (NUMA node 3)
+will be picked as 4. After the fix, we get the correct numa node which
+is 5.
+
+Fixes: da1115fdbd6e ("powerpc/nvdimm: Pick nearby online node if the device node is not online")
+Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20230404041433.1781804-1-aneesh.kumar@linux.ibm.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/numa.c | 1 +
+ arch/powerpc/platforms/pseries/papr_scm.c | 7 +++++++
+ 2 files changed, 8 insertions(+)
+
+diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
+index b44ce71917d75..16cfe56be05bb 100644
+--- a/arch/powerpc/mm/numa.c
++++ b/arch/powerpc/mm/numa.c
+@@ -366,6 +366,7 @@ void update_numa_distance(struct device_node *node)
+ WARN(numa_distance_table[nid][nid] == -1,
+ "NUMA distance details for node %d not provided\n", nid);
+ }
++EXPORT_SYMBOL_GPL(update_numa_distance);
+
+ /*
+ * ibm,numa-lookup-index-table= {N, domainid1, domainid2, ..... domainidN}
+diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
+index 2f8385523a132..1a53e048ceb76 100644
+--- a/arch/powerpc/platforms/pseries/papr_scm.c
++++ b/arch/powerpc/platforms/pseries/papr_scm.c
+@@ -1428,6 +1428,13 @@ static int papr_scm_probe(struct platform_device *pdev)
+ return -ENODEV;
+ }
+
++ /*
++ * open firmware platform device create won't update the NUMA
++ * distance table. For PAPR SCM devices we use numa_map_to_online_node()
++ * to find the nearest online NUMA node and that requires correct
++ * distance table information.
++ */
++ update_numa_distance(dn);
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+--
+2.39.2
+
--- /dev/null
+From c8f04f3ea2ffd7d22b3539d9487fc8505d341988 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 26 Mar 2023 18:21:21 +0000
+Subject: purgatory: fix disabling debug info
+
+From: Alyssa Ross <hi@alyssa.is>
+
+[ Upstream commit d83806c4c0cccc0d6d3c3581a11983a9c186a138 ]
+
+Since 32ef9e5054ec, -Wa,-gdwarf-2 is no longer used in KBUILD_AFLAGS.
+Instead, it includes -g, the appropriate -gdwarf-* flag, and also the
+-Wa versions of both of those if building with Clang and GNU as. As a
+result, debug info was being generated for the purgatory objects, even
+though the intention was that it not be.
+
+Fixes: 32ef9e5054ec ("Makefile.debug: re-enable debug info for .S files")
+Signed-off-by: Alyssa Ross <hi@alyssa.is>
+Cc: stable@vger.kernel.org
+Acked-by: Nick Desaulniers <ndesaulniers@google.com>
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/purgatory/Makefile | 7 +------
+ arch/x86/purgatory/Makefile | 3 +--
+ 2 files changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
+index d16bf715a586b..5730797a6b402 100644
+--- a/arch/riscv/purgatory/Makefile
++++ b/arch/riscv/purgatory/Makefile
+@@ -84,12 +84,7 @@ CFLAGS_string.o += $(PURGATORY_CFLAGS)
+ CFLAGS_REMOVE_ctype.o += $(PURGATORY_CFLAGS_REMOVE)
+ CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
+
+-AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
++asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
+
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
+index 17f09dc263811..82fec66d46d29 100644
+--- a/arch/x86/purgatory/Makefile
++++ b/arch/x86/purgatory/Makefile
+@@ -69,8 +69,7 @@ CFLAGS_sha256.o += $(PURGATORY_CFLAGS)
+ CFLAGS_REMOVE_string.o += $(PURGATORY_CFLAGS_REMOVE)
+ CFLAGS_string.o += $(PURGATORY_CFLAGS)
+
+-AFLAGS_REMOVE_setup-x86_$(BITS).o += -Wa,-gdwarf-2
+-AFLAGS_REMOVE_entry64.o += -Wa,-gdwarf-2
++asflags-remove-y += $(foreach x, -g -gdwarf-4 -gdwarf-5, $(x) -Wa,$(x))
+
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+--
+2.39.2
+
--- /dev/null
+From 3018f540f2064a9c0d2e59c56e84b398b3a6b0cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Jan 2023 22:23:00 +0100
+Subject: RISC-V: add infrastructure to allow different str* implementations
+
+From: Heiko Stuebner <heiko.stuebner@vrull.eu>
+
+[ Upstream commit 56e0790c7f9e59ba6a0f4b59981d1d6fbf43efb0 ]
+
+Depending on supported extensions on specific RISC-V cores,
+optimized str* functions might make sense.
+
+This adds basic infrastructure to allow patching the function calls
+via alternatives later on.
+
+The Linux kernel provides standard implementations for string functions
+but when architectures want to extend them, they need to provide their
+own.
+
+The added generic string functions are done in assembler (taken from
+disassembling the main-kernel functions for now) to allow us to control
+the used registers and extend them with optimized variants.
+
+This doesn't override the compiler's use of builtin replacements. So still
+first of all the compiler will select if a builtin will be better suitable
+i.e. for known strings. For all regular cases we will want to later
+select possible optimized variants and in the worst case fall back to the
+generic implemention added with this change.
+
+Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
+Signed-off-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230113212301.3534711-2-heiko@sntech.de
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Stable-dep-of: d83806c4c0cc ("purgatory: fix disabling debug info")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/include/asm/string.h | 10 ++++++++
+ arch/riscv/kernel/riscv_ksyms.c | 3 +++
+ arch/riscv/lib/Makefile | 3 +++
+ arch/riscv/lib/strcmp.S | 36 +++++++++++++++++++++++++++++
+ arch/riscv/lib/strlen.S | 28 ++++++++++++++++++++++
+ arch/riscv/lib/strncmp.S | 41 +++++++++++++++++++++++++++++++++
+ arch/riscv/purgatory/Makefile | 13 +++++++++++
+ 7 files changed, 134 insertions(+)
+ create mode 100644 arch/riscv/lib/strcmp.S
+ create mode 100644 arch/riscv/lib/strlen.S
+ create mode 100644 arch/riscv/lib/strncmp.S
+
+diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h
+index 9090493665555..a96b1fea24fe4 100644
+--- a/arch/riscv/include/asm/string.h
++++ b/arch/riscv/include/asm/string.h
+@@ -18,6 +18,16 @@ extern asmlinkage void *__memcpy(void *, const void *, size_t);
+ #define __HAVE_ARCH_MEMMOVE
+ extern asmlinkage void *memmove(void *, const void *, size_t);
+ extern asmlinkage void *__memmove(void *, const void *, size_t);
++
++#define __HAVE_ARCH_STRCMP
++extern asmlinkage int strcmp(const char *cs, const char *ct);
++
++#define __HAVE_ARCH_STRLEN
++extern asmlinkage __kernel_size_t strlen(const char *);
++
++#define __HAVE_ARCH_STRNCMP
++extern asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
++
+ /* For those files which don't want to check by kasan. */
+ #if defined(CONFIG_KASAN) && !defined(__SANITIZE_ADDRESS__)
+ #define memcpy(dst, src, len) __memcpy(dst, src, len)
+diff --git a/arch/riscv/kernel/riscv_ksyms.c b/arch/riscv/kernel/riscv_ksyms.c
+index 5ab1c7e1a6ed5..a72879b4249a5 100644
+--- a/arch/riscv/kernel/riscv_ksyms.c
++++ b/arch/riscv/kernel/riscv_ksyms.c
+@@ -12,6 +12,9 @@
+ EXPORT_SYMBOL(memset);
+ EXPORT_SYMBOL(memcpy);
+ EXPORT_SYMBOL(memmove);
++EXPORT_SYMBOL(strcmp);
++EXPORT_SYMBOL(strlen);
++EXPORT_SYMBOL(strncmp);
+ EXPORT_SYMBOL(__memset);
+ EXPORT_SYMBOL(__memcpy);
+ EXPORT_SYMBOL(__memmove);
+diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
+index 25d5c9664e57e..6c74b0bedd60d 100644
+--- a/arch/riscv/lib/Makefile
++++ b/arch/riscv/lib/Makefile
+@@ -3,6 +3,9 @@ lib-y += delay.o
+ lib-y += memcpy.o
+ lib-y += memset.o
+ lib-y += memmove.o
++lib-y += strcmp.o
++lib-y += strlen.o
++lib-y += strncmp.o
+ lib-$(CONFIG_MMU) += uaccess.o
+ lib-$(CONFIG_64BIT) += tishift.o
+
+diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
+new file mode 100644
+index 0000000000000..8babd712b9587
+--- /dev/null
++++ b/arch/riscv/lib/strcmp.S
+@@ -0,0 +1,36 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#include <linux/linkage.h>
++#include <asm/asm.h>
++#include <asm-generic/export.h>
++
++/* int strcmp(const char *cs, const char *ct) */
++SYM_FUNC_START(strcmp)
++ /*
++ * Returns
++ * a0 - comparison result, value like strcmp
++ *
++ * Parameters
++ * a0 - string1
++ * a1 - string2
++ *
++ * Clobbers
++ * t0, t1
++ */
++1:
++ lbu t0, 0(a0)
++ lbu t1, 0(a1)
++ addi a0, a0, 1
++ addi a1, a1, 1
++ bne t0, t1, 2f
++ bnez t0, 1b
++ li a0, 0
++ ret
++2:
++ /*
++ * strcmp only needs to return (< 0, 0, > 0) values
++ * not necessarily -1, 0, +1
++ */
++ sub a0, t0, t1
++ ret
++SYM_FUNC_END(strcmp)
+diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
+new file mode 100644
+index 0000000000000..0a3b11853efdb
+--- /dev/null
++++ b/arch/riscv/lib/strlen.S
+@@ -0,0 +1,28 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#include <linux/linkage.h>
++#include <asm/asm.h>
++#include <asm-generic/export.h>
++
++/* int strlen(const char *s) */
++SYM_FUNC_START(strlen)
++ /*
++ * Returns
++ * a0 - string length
++ *
++ * Parameters
++ * a0 - String to measure
++ *
++ * Clobbers:
++ * t0, t1
++ */
++ mv t1, a0
++1:
++ lbu t0, 0(t1)
++ beqz t0, 2f
++ addi t1, t1, 1
++ j 1b
++2:
++ sub a0, t1, a0
++ ret
++SYM_FUNC_END(strlen)
+diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
+new file mode 100644
+index 0000000000000..1f644d0a93f68
+--- /dev/null
++++ b/arch/riscv/lib/strncmp.S
+@@ -0,0 +1,41 @@
++/* SPDX-License-Identifier: GPL-2.0-only */
++
++#include <linux/linkage.h>
++#include <asm/asm.h>
++#include <asm-generic/export.h>
++
++/* int strncmp(const char *cs, const char *ct, size_t count) */
++SYM_FUNC_START(strncmp)
++ /*
++ * Returns
++ * a0 - comparison result, value like strncmp
++ *
++ * Parameters
++ * a0 - string1
++ * a1 - string2
++ * a2 - number of characters to compare
++ *
++ * Clobbers
++ * t0, t1, t2
++ */
++ li t2, 0
++1:
++ beq a2, t2, 2f
++ lbu t0, 0(a0)
++ lbu t1, 0(a1)
++ addi a0, a0, 1
++ addi a1, a1, 1
++ bne t0, t1, 3f
++ addi t2, t2, 1
++ bnez t0, 1b
++2:
++ li a0, 0
++ ret
++3:
++ /*
++ * strncmp only needs to return (< 0, 0, > 0) values
++ * not necessarily -1, 0, +1
++ */
++ sub a0, t0, t1
++ ret
++SYM_FUNC_END(strncmp)
+diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile
+index dd58e1d993972..d16bf715a586b 100644
+--- a/arch/riscv/purgatory/Makefile
++++ b/arch/riscv/purgatory/Makefile
+@@ -2,6 +2,7 @@
+ OBJECT_FILES_NON_STANDARD := y
+
+ purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o
++purgatory-y += strcmp.o strlen.o strncmp.o
+
+ targets += $(purgatory-y)
+ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
+@@ -18,6 +19,15 @@ $(obj)/memcpy.o: $(srctree)/arch/riscv/lib/memcpy.S FORCE
+ $(obj)/memset.o: $(srctree)/arch/riscv/lib/memset.S FORCE
+ $(call if_changed_rule,as_o_S)
+
++$(obj)/strcmp.o: $(srctree)/arch/riscv/lib/strcmp.S FORCE
++ $(call if_changed_rule,as_o_S)
++
++$(obj)/strlen.o: $(srctree)/arch/riscv/lib/strlen.S FORCE
++ $(call if_changed_rule,as_o_S)
++
++$(obj)/strncmp.o: $(srctree)/arch/riscv/lib/strncmp.S FORCE
++ $(call if_changed_rule,as_o_S)
++
+ $(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+@@ -77,6 +87,9 @@ CFLAGS_ctype.o += $(PURGATORY_CFLAGS)
+ AFLAGS_REMOVE_entry.o += -Wa,-gdwarf-2
+ AFLAGS_REMOVE_memcpy.o += -Wa,-gdwarf-2
+ AFLAGS_REMOVE_memset.o += -Wa,-gdwarf-2
++AFLAGS_REMOVE_strcmp.o += -Wa,-gdwarf-2
++AFLAGS_REMOVE_strlen.o += -Wa,-gdwarf-2
++AFLAGS_REMOVE_strncmp.o += -Wa,-gdwarf-2
+
+ $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE
+ $(call if_changed,ld)
+--
+2.39.2
+
--- /dev/null
+From cbf41c65e31e4cd561076444cbdd7c2daaabda30 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Mar 2023 10:19:30 +0200
+Subject: riscv: Move early dtb mapping into the fixmap region
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit ef69d2559fe91f23d27a3d6fd640b5641787d22e ]
+
+riscv establishes 2 virtual mappings:
+
+- early_pg_dir maps the kernel which allows to discover the system
+ memory
+- swapper_pg_dir installs the final mapping (linear mapping included)
+
+We used to map the dtb in early_pg_dir using DTB_EARLY_BASE_VA, and this
+mapping was not carried over in swapper_pg_dir. It happens that
+early_init_fdt_scan_reserved_mem() must be called before swapper_pg_dir is
+setup otherwise we could allocate reserved memory defined in the dtb.
+And this function initializes reserved_mem variable with addresses that
+lie in the early_pg_dir dtb mapping: when those addresses are reused
+with swapper_pg_dir, this mapping does not exist and then we trap.
+
+The previous "fix" was incorrect as early_init_fdt_scan_reserved_mem()
+must be called before swapper_pg_dir is set up otherwise we could
+allocate in reserved memory defined in the dtb.
+
+So move the dtb mapping in the fixmap region which is established in
+early_pg_dir and handed over to swapper_pg_dir.
+
+Fixes: 922b0375fc93 ("riscv: Fix memblock reservation for device tree blob")
+Fixes: 8f3a2b4a96dc ("RISC-V: Move DT mapping outof fixmap")
+Fixes: 50e63dd8ed92 ("riscv: fix reserved memory setup")
+Reported-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/all/f8e67f82-103d-156c-deb0-d6d6e2756f5e@microchip.com/
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
+Tested-by: Conor Dooley <conor.dooley@microchip.com>
+Link: https://lore.kernel.org/r/20230329081932.79831-2-alexghiti@rivosinc.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/riscv/vm-layout.rst | 6 +--
+ arch/riscv/include/asm/fixmap.h | 8 ++++
+ arch/riscv/include/asm/pgtable.h | 8 +++-
+ arch/riscv/kernel/setup.c | 1 -
+ arch/riscv/mm/init.c | 61 +++++++++++++++++--------------
+ 5 files changed, 51 insertions(+), 33 deletions(-)
+
+diff --git a/Documentation/riscv/vm-layout.rst b/Documentation/riscv/vm-layout.rst
+index 3be44e74ec5d6..5462c84f4723f 100644
+--- a/Documentation/riscv/vm-layout.rst
++++ b/Documentation/riscv/vm-layout.rst
+@@ -47,7 +47,7 @@ RISC-V Linux Kernel SV39
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+- ffffffc6fee00000 | -228 GB | ffffffc6feffffff | 2 MB | fixmap
++ ffffffc6fea00000 | -228 GB | ffffffc6feffffff | 6 MB | fixmap
+ ffffffc6ff000000 | -228 GB | ffffffc6ffffffff | 16 MB | PCI io
+ ffffffc700000000 | -228 GB | ffffffc7ffffffff | 4 GB | vmemmap
+ ffffffc800000000 | -224 GB | ffffffd7ffffffff | 64 GB | vmalloc/ioremap space
+@@ -83,7 +83,7 @@ RISC-V Linux Kernel SV48
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+- ffff8d7ffee00000 | -114.5 TB | ffff8d7ffeffffff | 2 MB | fixmap
++ ffff8d7ffea00000 | -114.5 TB | ffff8d7ffeffffff | 6 MB | fixmap
+ ffff8d7fff000000 | -114.5 TB | ffff8d7fffffffff | 16 MB | PCI io
+ ffff8d8000000000 | -114.5 TB | ffff8f7fffffffff | 2 TB | vmemmap
+ ffff8f8000000000 | -112.5 TB | ffffaf7fffffffff | 32 TB | vmalloc/ioremap space
+@@ -119,7 +119,7 @@ RISC-V Linux Kernel SV57
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+- ff1bfffffee00000 | -57 PB | ff1bfffffeffffff | 2 MB | fixmap
++ ff1bfffffea00000 | -57 PB | ff1bfffffeffffff | 6 MB | fixmap
+ ff1bffffff000000 | -57 PB | ff1bffffffffffff | 16 MB | PCI io
+ ff1c000000000000 | -57 PB | ff1fffffffffffff | 1 PB | vmemmap
+ ff20000000000000 | -56 PB | ff5fffffffffffff | 16 PB | vmalloc/ioremap space
+diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
+index 5c3e7b97fcc6f..0a55099bb7349 100644
+--- a/arch/riscv/include/asm/fixmap.h
++++ b/arch/riscv/include/asm/fixmap.h
+@@ -22,6 +22,14 @@
+ */
+ enum fixed_addresses {
+ FIX_HOLE,
++ /*
++ * The fdt fixmap mapping must be PMD aligned and will be mapped
++ * using PMD entries in fixmap_pmd in 64-bit and a PGD entry in 32-bit.
++ */
++ FIX_FDT_END,
++ FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1,
++
++ /* Below fixmaps will be mapped using fixmap_pte */
+ FIX_PTE,
+ FIX_PMD,
+ FIX_PUD,
+diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
+index 92ec2d9d7273f..2aeaf8e3a4ab0 100644
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -87,9 +87,13 @@
+
+ #define FIXADDR_TOP PCI_IO_START
+ #ifdef CONFIG_64BIT
+-#define FIXADDR_SIZE PMD_SIZE
++#define MAX_FDT_SIZE PMD_SIZE
++#define FIX_FDT_SIZE (MAX_FDT_SIZE + SZ_2M)
++#define FIXADDR_SIZE (PMD_SIZE + FIX_FDT_SIZE)
+ #else
+-#define FIXADDR_SIZE PGDIR_SIZE
++#define MAX_FDT_SIZE PGDIR_SIZE
++#define FIX_FDT_SIZE MAX_FDT_SIZE
++#define FIXADDR_SIZE (PGDIR_SIZE + FIX_FDT_SIZE)
+ #endif
+ #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
+
+diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
+index d4a12233e728d..2acf51c235673 100644
+--- a/arch/riscv/kernel/setup.c
++++ b/arch/riscv/kernel/setup.c
+@@ -280,7 +280,6 @@ void __init setup_arch(char **cmdline_p)
+ #else
+ unflatten_device_tree();
+ #endif
+- early_init_fdt_scan_reserved_mem();
+ misc_mem_init();
+
+ init_resources();
+diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
+index 50a1b6edd4918..5570c52deb0b5 100644
+--- a/arch/riscv/mm/init.c
++++ b/arch/riscv/mm/init.c
+@@ -57,7 +57,6 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
+ EXPORT_SYMBOL(empty_zero_page);
+
+ extern char _start[];
+-#define DTB_EARLY_BASE_VA PGDIR_SIZE
+ void *_dtb_early_va __initdata;
+ uintptr_t _dtb_early_pa __initdata;
+
+@@ -236,6 +235,14 @@ static void __init setup_bootmem(void)
+ set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
+
+ reserve_initrd_mem();
++
++ /*
++ * No allocation should be done before reserving the memory as defined
++ * in the device tree, otherwise the allocation could end up in a
++ * reserved region.
++ */
++ early_init_fdt_scan_reserved_mem();
++
+ /*
+ * If DTB is built in, no need to reserve its memblock.
+ * Otherwise, do reserve it but avoid using
+@@ -279,9 +286,6 @@ pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+ static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
+
+ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+-static p4d_t __maybe_unused early_dtb_p4d[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
+-static pud_t __maybe_unused early_dtb_pud[PTRS_PER_PUD] __initdata __aligned(PAGE_SIZE);
+-static pmd_t __maybe_unused early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+
+ #ifdef CONFIG_XIP_KERNEL
+ #define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
+@@ -626,9 +630,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
+ #define trampoline_pgd_next (pgtable_l5_enabled ? \
+ (uintptr_t)trampoline_p4d : (pgtable_l4_enabled ? \
+ (uintptr_t)trampoline_pud : (uintptr_t)trampoline_pmd))
+-#define early_dtb_pgd_next (pgtable_l5_enabled ? \
+- (uintptr_t)early_dtb_p4d : (pgtable_l4_enabled ? \
+- (uintptr_t)early_dtb_pud : (uintptr_t)early_dtb_pmd))
+ #else
+ #define pgd_next_t pte_t
+ #define alloc_pgd_next(__va) pt_ops.alloc_pte(__va)
+@@ -636,7 +637,6 @@ static void __init create_p4d_mapping(p4d_t *p4dp,
+ #define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
+ create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
+ #define fixmap_pgd_next ((uintptr_t)fixmap_pte)
+-#define early_dtb_pgd_next ((uintptr_t)early_dtb_pmd)
+ #define create_p4d_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+ #define create_pud_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+ #define create_pmd_mapping(__pmdp, __va, __pa, __sz, __prot) do {} while(0)
+@@ -859,32 +859,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, bool early)
+ * this means 2 PMD entries whereas for 32-bit kernel, this is only 1 PGDIR
+ * entry.
+ */
+-static void __init create_fdt_early_page_table(pgd_t *pgdir, uintptr_t dtb_pa)
++static void __init create_fdt_early_page_table(pgd_t *pgdir,
++ uintptr_t fix_fdt_va,
++ uintptr_t dtb_pa)
+ {
+-#ifndef CONFIG_BUILTIN_DTB
+ uintptr_t pa = dtb_pa & ~(PMD_SIZE - 1);
+
+- create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
+- IS_ENABLED(CONFIG_64BIT) ? early_dtb_pgd_next : pa,
+- PGDIR_SIZE,
+- IS_ENABLED(CONFIG_64BIT) ? PAGE_TABLE : PAGE_KERNEL);
+-
+- if (pgtable_l5_enabled)
+- create_p4d_mapping(early_dtb_p4d, DTB_EARLY_BASE_VA,
+- (uintptr_t)early_dtb_pud, P4D_SIZE, PAGE_TABLE);
+-
+- if (pgtable_l4_enabled)
+- create_pud_mapping(early_dtb_pud, DTB_EARLY_BASE_VA,
+- (uintptr_t)early_dtb_pmd, PUD_SIZE, PAGE_TABLE);
++#ifndef CONFIG_BUILTIN_DTB
++ /* Make sure the fdt fixmap address is always aligned on PMD size */
++ BUILD_BUG_ON(FIX_FDT % (PMD_SIZE / PAGE_SIZE));
+
+- if (IS_ENABLED(CONFIG_64BIT)) {
+- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
++ /* In 32-bit only, the fdt lies in its own PGD */
++ if (!IS_ENABLED(CONFIG_64BIT)) {
++ create_pgd_mapping(early_pg_dir, fix_fdt_va,
++ pa, MAX_FDT_SIZE, PAGE_KERNEL);
++ } else {
++ create_pmd_mapping(fixmap_pmd, fix_fdt_va,
+ pa, PMD_SIZE, PAGE_KERNEL);
+- create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
++ create_pmd_mapping(fixmap_pmd, fix_fdt_va + PMD_SIZE,
+ pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
+ }
+
+- dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
++ dtb_early_va = (void *)fix_fdt_va + (dtb_pa & (PMD_SIZE - 1));
+ #else
+ /*
+ * For 64-bit kernel, __va can't be used since it would return a linear
+@@ -1054,7 +1050,8 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+ create_kernel_page_table(early_pg_dir, true);
+
+ /* Setup early mapping for FDT early scan */
+- create_fdt_early_page_table(early_pg_dir, dtb_pa);
++ create_fdt_early_page_table(early_pg_dir,
++ __fix_to_virt(FIX_FDT), dtb_pa);
+
+ /*
+ * Bootime fixmap only can handle PMD_SIZE mapping. Thus, boot-ioremap
+@@ -1096,6 +1093,16 @@ static void __init setup_vm_final(void)
+ u64 i;
+
+ /* Setup swapper PGD for fixmap */
++#if !defined(CONFIG_64BIT)
++ /*
++ * In 32-bit, the device tree lies in a pgd entry, so it must be copied
++ * directly in swapper_pg_dir in addition to the pgd entry that points
++ * to fixmap_pte.
++ */
++ unsigned long idx = pgd_index(__fix_to_virt(FIX_FDT));
++
++ set_pgd(&swapper_pg_dir[idx], early_pg_dir[idx]);
++#endif
+ create_pgd_mapping(swapper_pg_dir, FIXADDR_START,
+ __pa_symbol(fixmap_pgd_next),
+ PGDIR_SIZE, PAGE_TABLE);
+--
+2.39.2
+
--- /dev/null
+From aade648641f22408ed5e5297350048704fcf3608 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Apr 2023 11:06:11 +0200
+Subject: sched/fair: Fix imbalance overflow
+
+From: Vincent Guittot <vincent.guittot@linaro.org>
+
+[ Upstream commit 91dcf1e8068e9a8823e419a7a34ff4341275fb70 ]
+
+When local group is fully busy but its average load is above system load,
+computing the imbalance will overflow and local group is not the best
+target for pulling this load.
+
+Fixes: 0b0695f2b34a ("sched/fair: Rework load_balance()")
+Reported-by: Tingjia Cao <tjcao980311@gmail.com>
+Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Tested-by: Tingjia Cao <tjcao980311@gmail.com>
+Link: https://lore.kernel.org/lkml/CABcWv9_DAhVBOq2=W=2ypKE9dKM5s2DvoV8-U0+GDwwuKZ89jQ@mail.gmail.com/T/
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 88821ab009b30..ec2d913280e6a 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10041,6 +10041,16 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
+
+ sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
+ sds->total_capacity;
++
++ /*
++ * If the local group is more loaded than the average system
++ * load, don't try to pull any tasks.
++ */
++ if (local->avg_load >= sds->avg_load) {
++ env->imbalance = 0;
++ return;
++ }
++
+ }
+
+ /*
+--
+2.39.2
+
drm-amd-pm-correct-smu13.0.7-max-shader-clock-reporting.patch
mptcp-use-mptcp_schedule_work-instead-of-open-coding-it.patch
mptcp-stricter-state-check-in-mptcp_worker.patch
+ubi-fix-deadlock-caused-by-recursively-holding-work_.patch
+i2c-mchp-pci1xxxx-update-timing-registers.patch
+ubi-fix-failure-attaching-when-vid_hdr-offset-equals.patch
+powerpc-papr_scm-update-the-numa-distance-table-for-.patch
+sched-fair-fix-imbalance-overflow.patch
+x86-rtc-remove-__init-for-runtime-functions.patch
+i2c-ocores-generate-stop-condition-after-timeout-in-.patch
+cifs-fix-negotiate-context-parsing.patch
+risc-v-add-infrastructure-to-allow-different-str-imp.patch
+purgatory-fix-disabling-debug-info.patch
+documentation-riscv-document-the-sv57-vm-layout.patch
+riscv-move-early-dtb-mapping-into-the-fixmap-region.patch
+nvme-pci-mark-lexar-nm760-as-ignore_dev_subnqn.patch
+nvme-pci-add-nvme_quirk_bogus_nid-for-t-force-z330-s.patch
+cgroup-cpuset-skip-spread-flags-update-on-v2.patch
+cgroup-cpuset-make-cpuset_fork-handle-clone_into_cgr.patch
+cgroup-cpuset-add-cpuset_can_fork-and-cpuset_cancel_.patch
--- /dev/null
+From 902ffed3205d4bc12d520309f4a9f162b6036f0a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 4 Mar 2023 09:41:41 +0800
+Subject: ubi: Fix deadlock caused by recursively holding work_sem
+
+From: ZhaoLong Wang <wangzhaolong1@huawei.com>
+
+[ Upstream commit f773f0a331d6c41733b17bebbc1b6cae12e016f5 ]
+
+During the processing of the bgt, if the sync_erase() return -EBUSY
+or some other error code in __erase_worker(),schedule_erase() called
+again lead to the down_read(ubi->work_sem) hold twice and may get
+block by down_write(ubi->work_sem) in ubi_update_fastmap(),
+which cause deadlock.
+
+ ubi bgt other task
+ do_work
+ down_read(&ubi->work_sem) ubi_update_fastmap
+ erase_worker # Blocked by down_read
+ __erase_worker down_write(&ubi->work_sem)
+ schedule_erase
+ schedule_ubi_work
+ down_read(&ubi->work_sem)
+
+Fix this by changing input parameter @nested of the schedule_erase() to
+'true' to avoid recursively acquiring the down_read(&ubi->work_sem).
+
+Also, fix the incorrect comment about @nested parameter of the
+schedule_erase() because when down_write(ubi->work_sem) is held, the
+@nested is also need be true.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217093
+Fixes: 2e8f08deabbc ("ubi: Fix races around ubi_refill_pools()")
+Signed-off-by: ZhaoLong Wang <wangzhaolong1@huawei.com>
+Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/ubi/wl.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
+index 9e14319225c97..6049ab9e46479 100644
+--- a/drivers/mtd/ubi/wl.c
++++ b/drivers/mtd/ubi/wl.c
+@@ -575,7 +575,7 @@ static int erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk,
+ * @vol_id: the volume ID that last used this PEB
+ * @lnum: the last used logical eraseblock number for the PEB
+ * @torture: if the physical eraseblock has to be tortured
+- * @nested: denotes whether the work_sem is already held in read mode
++ * @nested: denotes whether the work_sem is already held
+ *
+ * This function returns zero in case of success and a %-ENOMEM in case of
+ * failure.
+@@ -1131,7 +1131,7 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
+ int err1;
+
+ /* Re-schedule the LEB for erasure */
+- err1 = schedule_erase(ubi, e, vol_id, lnum, 0, false);
++ err1 = schedule_erase(ubi, e, vol_id, lnum, 0, true);
+ if (err1) {
+ spin_lock(&ubi->wl_lock);
+ wl_entry_destroy(ubi, e);
+--
+2.39.2
+
--- /dev/null
+From 9ceb73731c25f98bebb89008a028db23a7d5cade Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Mar 2023 09:33:08 +0800
+Subject: ubi: Fix failure attaching when vid_hdr offset equals to (sub)page
+ size
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit 1e020e1b96afdecd20680b5b5be2a6ffc3d27628 ]
+
+Following process will make ubi attaching failed since commit
+1b42b1a36fc946 ("ubi: ensure that VID header offset ... size"):
+
+ID="0xec,0xa1,0x00,0x15" # 128M 128KB 2KB
+modprobe nandsim id_bytes=$ID
+flash_eraseall /dev/mtd0
+modprobe ubi mtd="0,2048" # set vid_hdr offset as 2048 (one page)
+(dmesg):
+ ubi0 error: ubi_attach_mtd_dev [ubi]: VID header offset 2048 too large.
+ UBI error: cannot attach mtd0
+ UBI error: cannot initialize UBI, error -22
+
+Rework original solution, the key point is making sure
+'vid_hdr_shift + UBI_VID_HDR_SIZE < ubi->vid_hdr_alsize',
+so we should check vid_hdr_shift rather not vid_hdr_offset.
+Then, ubi still support (sub)page aligined VID header offset.
+
+Fixes: 1b42b1a36fc946 ("ubi: ensure that VID header offset ... size")
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Tested-by: Nicolas Schichan <nschichan@freebox.fr>
+Tested-by: Miquel Raynal <miquel.raynal@bootlin.com> # v5.10, v4.19
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/ubi/build.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c
+index 7f65af1697519..1662c12e24ada 100644
+--- a/drivers/mtd/ubi/build.c
++++ b/drivers/mtd/ubi/build.c
+@@ -664,12 +664,6 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
+ ubi->ec_hdr_alsize = ALIGN(UBI_EC_HDR_SIZE, ubi->hdrs_min_io_size);
+ ubi->vid_hdr_alsize = ALIGN(UBI_VID_HDR_SIZE, ubi->hdrs_min_io_size);
+
+- if (ubi->vid_hdr_offset && ((ubi->vid_hdr_offset + UBI_VID_HDR_SIZE) >
+- ubi->vid_hdr_alsize)) {
+- ubi_err(ubi, "VID header offset %d too large.", ubi->vid_hdr_offset);
+- return -EINVAL;
+- }
+-
+ dbg_gen("min_io_size %d", ubi->min_io_size);
+ dbg_gen("max_write_size %d", ubi->max_write_size);
+ dbg_gen("hdrs_min_io_size %d", ubi->hdrs_min_io_size);
+@@ -687,6 +681,21 @@ static int io_init(struct ubi_device *ubi, int max_beb_per1024)
+ ubi->vid_hdr_aloffset;
+ }
+
++ /*
++ * Memory allocation for VID header is ubi->vid_hdr_alsize
++ * which is described in comments in io.c.
++ * Make sure VID header shift + UBI_VID_HDR_SIZE not exceeds
++ * ubi->vid_hdr_alsize, so that all vid header operations
++ * won't access memory out of bounds.
++ */
++ if ((ubi->vid_hdr_shift + UBI_VID_HDR_SIZE) > ubi->vid_hdr_alsize) {
++ ubi_err(ubi, "Invalid VID header offset %d, VID header shift(%d)"
++ " + VID header size(%zu) > VID header aligned size(%d).",
++ ubi->vid_hdr_offset, ubi->vid_hdr_shift,
++ UBI_VID_HDR_SIZE, ubi->vid_hdr_alsize);
++ return -EINVAL;
++ }
++
+ /* Similar for the data offset */
+ ubi->leb_start = ubi->vid_hdr_offset + UBI_VID_HDR_SIZE;
+ ubi->leb_start = ALIGN(ubi->leb_start, ubi->min_io_size);
+--
+2.39.2
+
--- /dev/null
+From a9718d50cb08e9319acde2c505a20bd4acaf0f96 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Apr 2023 08:26:52 +0200
+Subject: x86/rtc: Remove __init for runtime functions
+
+From: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
+
+[ Upstream commit 775d3c514c5b2763a50ab7839026d7561795924d ]
+
+set_rtc_noop(), get_rtc_noop() are after booting, therefore their __init
+annotation is wrong.
+
+A crash was observed on an x86 platform where CMOS RTC is unused and
+disabled via device tree. set_rtc_noop() was invoked from ntp:
+sync_hw_clock(), although CONFIG_RTC_SYSTOHC=n, however sync_cmos_clock()
+doesn't honour that.
+
+ Workqueue: events_power_efficient sync_hw_clock
+ RIP: 0010:set_rtc_noop
+ Call Trace:
+ update_persistent_clock64
+ sync_hw_clock
+
+Fix this by dropping the __init annotation from set/get_rtc_noop().
+
+Fixes: c311ed6183f4 ("x86/init: Allow DT configured systems to disable RTC at boot time")
+Signed-off-by: Matija Glavinic Pecotic <matija.glavinic-pecotic.ext@nokia.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/59f7ceb1-446b-1d3d-0bc8-1f0ee94b1e18@nokia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/x86_init.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
+index ef80d361b4632..10622cf2b30f4 100644
+--- a/arch/x86/kernel/x86_init.c
++++ b/arch/x86/kernel/x86_init.c
+@@ -33,8 +33,8 @@ static int __init iommu_init_noop(void) { return 0; }
+ static void iommu_shutdown_noop(void) { }
+ bool __init bool_x86_init_noop(void) { return false; }
+ void x86_op_int_noop(int cpu) { }
+-static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+-static __init void get_rtc_noop(struct timespec64 *now) { }
++static int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
++static void get_rtc_noop(struct timespec64 *now) { }
+
+ static __initconst const struct of_device_id of_cmos_match[] = {
+ { .compatible = "motorola,mc146818" },
+--
+2.39.2
+