--- /dev/null
+From 7710968371e3e94fde9a4dc59b1724e2a8a67849 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 12 Oct 2024 07:22:46 +0000
+Subject: cgroup: Fix potential overflow issue when checking max_depth
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Xiu Jianfeng <xiujianfeng@huawei.com>
+
+[ Upstream commit 3cc4e13bb1617f6a13e5e6882465984148743cf4 ]
+
+cgroup.max.depth is the maximum allowed descent depth below the current
+cgroup. If the actual descent depth is equal or larger, an attempt to
+create a new child cgroup will fail. However due to the cgroup->max_depth
+is of int type and having the default value INT_MAX, the condition
+'level > cgroup->max_depth' will never be satisfied, and it will cause
+an overflow of the level after it reaches to INT_MAX.
+
+Fix it by starting the level from 0 and using '>=' instead.
+
+It's worth mentioning that this issue is unlikely to occur in reality,
+as it's impossible to have a depth of INT_MAX hierarchy, but should be
+be avoided logically.
+
+Fixes: 1a926e0bbab8 ("cgroup: implement hierarchy limits")
+Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
+Reviewed-by: Michal Koutný <mkoutny@suse.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/cgroup/cgroup.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index f6656fd410d0f..2ca4aeb21a440 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -5707,7 +5707,7 @@ static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
+ {
+ struct cgroup *cgroup;
+ int ret = false;
+- int level = 1;
++ int level = 0;
+
+ lockdep_assert_held(&cgroup_mutex);
+
+@@ -5715,7 +5715,7 @@ static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
+ if (cgroup->nr_descendants >= cgroup->max_descendants)
+ goto fail;
+
+- if (level > cgroup->max_depth)
++ if (level >= cgroup->max_depth)
+ goto fail;
+
+ level++;
+--
+2.43.0
+
--- /dev/null
+From 060240fce282fe2fdb001451d0a5f6525c66835e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Sep 2024 15:42:46 +0200
+Subject: cpufreq: Avoid a bad reference count on CPU node
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Miquel Sabaté Solà <mikisabate@gmail.com>
+
+[ Upstream commit c0f02536fffbbec71aced36d52a765f8c4493dc2 ]
+
+In the parse_perf_domain function, if the call to
+of_parse_phandle_with_args returns an error, then the reference to the
+CPU device node that was acquired at the start of the function would not
+be properly decremented.
+
+Address this by declaring the variable with the __free(device_node)
+cleanup attribute.
+
+Signed-off-by: Miquel Sabaté Solà <mikisabate@gmail.com>
+Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
+Link: https://patch.msgid.link/20240917134246.584026-1-mikisabate@gmail.com
+Cc: All applicable <stable@vger.kernel.org>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/cpufreq.h | 6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
+index 1976244b97e3a..3759d0a15c7b2 100644
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -1126,10 +1126,9 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
+ const char *cell_name,
+ struct of_phandle_args *args)
+ {
+- struct device_node *cpu_np;
+ int ret;
+
+- cpu_np = of_cpu_device_node_get(cpu);
++ struct device_node *cpu_np __free(device_node) = of_cpu_device_node_get(cpu);
+ if (!cpu_np)
+ return -ENODEV;
+
+@@ -1137,9 +1136,6 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
+ args);
+ if (ret < 0)
+ return ret;
+-
+- of_node_put(cpu_np);
+-
+ return 0;
+ }
+
+--
+2.43.0
+
--- /dev/null
+From 8a699f9d3fb2405c1e54b7d843e5d9ae8a6f8c0b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Oct 2022 13:39:23 +0900
+Subject: cpufreq: Generalize of_perf_domain_get_sharing_cpumask phandle format
+
+From: Hector Martin <marcan@marcan.st>
+
+[ Upstream commit d182dc6de93225cd853de4db68a1a77501bedb6e ]
+
+of_perf_domain_get_sharing_cpumask currently assumes a 1-argument
+phandle format, and directly returns the argument. Generalize this to
+return the full of_phandle_args, so it can be used by drivers which use
+other phandle styles (e.g. separate nodes). This also requires changing
+the CPU sharing match to compare the full args structure.
+
+Also, make sure to of_node_put(args.np) (the original code was leaking a
+reference).
+
+Signed-off-by: Hector Martin <marcan@marcan.st>
+Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
+Stable-dep-of: c0f02536fffb ("cpufreq: Avoid a bad reference count on CPU node")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/mediatek-cpufreq-hw.c | 14 +++++++++-----
+ include/linux/cpufreq.h | 28 +++++++++++++++------------
+ 2 files changed, 25 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/cpufreq/mediatek-cpufreq-hw.c b/drivers/cpufreq/mediatek-cpufreq-hw.c
+index 7f326bb5fd8de..62f5a9d64e8fa 100644
+--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
++++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
+@@ -162,6 +162,7 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
+ struct mtk_cpufreq_data *data;
+ struct device *dev = &pdev->dev;
+ struct resource *res;
++ struct of_phandle_args args;
+ void __iomem *base;
+ int ret, i;
+ int index;
+@@ -170,11 +171,14 @@ static int mtk_cpu_resources_init(struct platform_device *pdev,
+ if (!data)
+ return -ENOMEM;
+
+- index = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains",
+- "#performance-domain-cells",
+- policy->cpus);
+- if (index < 0)
+- return index;
++ ret = of_perf_domain_get_sharing_cpumask(policy->cpu, "performance-domains",
++ "#performance-domain-cells",
++ policy->cpus, &args);
++ if (ret < 0)
++ return ret;
++
++ index = args.args[0];
++ of_node_put(args.np);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+ if (!res) {
+diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
+index 9d208648c84d5..1976244b97e3a 100644
+--- a/include/linux/cpufreq.h
++++ b/include/linux/cpufreq.h
+@@ -1123,10 +1123,10 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+ }
+
+ static inline int parse_perf_domain(int cpu, const char *list_name,
+- const char *cell_name)
++ const char *cell_name,
++ struct of_phandle_args *args)
+ {
+ struct device_node *cpu_np;
+- struct of_phandle_args args;
+ int ret;
+
+ cpu_np = of_cpu_device_node_get(cpu);
+@@ -1134,41 +1134,44 @@ static inline int parse_perf_domain(int cpu, const char *list_name,
+ return -ENODEV;
+
+ ret = of_parse_phandle_with_args(cpu_np, list_name, cell_name, 0,
+- &args);
++ args);
+ if (ret < 0)
+ return ret;
+
+ of_node_put(cpu_np);
+
+- return args.args[0];
++ return 0;
+ }
+
+ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+- const char *cell_name, struct cpumask *cpumask)
++ const char *cell_name, struct cpumask *cpumask,
++ struct of_phandle_args *pargs)
+ {
+- int target_idx;
+ int cpu, ret;
++ struct of_phandle_args args;
+
+- ret = parse_perf_domain(pcpu, list_name, cell_name);
++ ret = parse_perf_domain(pcpu, list_name, cell_name, pargs);
+ if (ret < 0)
+ return ret;
+
+- target_idx = ret;
+ cpumask_set_cpu(pcpu, cpumask);
+
+ for_each_possible_cpu(cpu) {
+ if (cpu == pcpu)
+ continue;
+
+- ret = parse_perf_domain(cpu, list_name, cell_name);
++ ret = parse_perf_domain(cpu, list_name, cell_name, &args);
+ if (ret < 0)
+ continue;
+
+- if (target_idx == ret)
++ if (pargs->np == args.np && pargs->args_count == args.args_count &&
++ !memcmp(pargs->args, args.args, sizeof(args.args[0]) * args.args_count))
+ cpumask_set_cpu(cpu, cpumask);
++
++ of_node_put(args.np);
+ }
+
+- return target_idx;
++ return 0;
+ }
+ #else
+ static inline int cpufreq_boost_trigger_state(int state)
+@@ -1198,7 +1201,8 @@ cpufreq_table_set_inefficient(struct cpufreq_policy *policy,
+ }
+
+ static inline int of_perf_domain_get_sharing_cpumask(int pcpu, const char *list_name,
+- const char *cell_name, struct cpumask *cpumask)
++ const char *cell_name, struct cpumask *cpumask,
++ struct of_phandle_args *pargs)
+ {
+ return -EOPNOTSUPP;
+ }
+--
+2.43.0
+
--- /dev/null
+From 0dffb2de44196a0f52f453419ce4c6ef606caec1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Mar 2023 18:57:01 +0000
+Subject: fs/proc/kcore: avoid bounce buffer for ktext data
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 2e1c0170771e6bf31bc785ea43a44e6e85e36268 ]
+
+Patch series "convert read_kcore(), vread() to use iterators", v8.
+
+While reviewing Baoquan's recent changes to permit vread() access to
+vm_map_ram regions of vmalloc allocations, Willy pointed out [1] that it
+would be nice to refactor vread() as a whole, since its only user is
+read_kcore() and the existing form of vread() necessitates the use of a
+bounce buffer.
+
+This patch series does exactly that, as well as adjusting how we read the
+kernel text section to avoid the use of a bounce buffer in this case as
+well.
+
+This has been tested against the test case which motivated Baoquan's
+changes in the first place [2] which continues to function correctly, as
+do the vmalloc self tests.
+
+This patch (of 4):
+
+Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data")
+introduced the use of a bounce buffer to retrieve kernel text data for
+/proc/kcore in order to avoid failures arising from hardened user copies
+enabled by CONFIG_HARDENED_USERCOPY in check_kernel_text_object().
+
+We can avoid doing this if instead of copy_to_user() we use
+_copy_to_user() which bypasses the hardening check. This is more
+efficient than using a bounce buffer and simplifies the code.
+
+We do so as part an overall effort to eliminate bounce buffer usage in the
+function with an eye to converting it an iterator read.
+
+Link: https://lkml.kernel.org/r/cover.1679566220.git.lstoakes@gmail.com
+Link: https://lore.kernel.org/all/Y8WfDSRkc%2FOHP3oD@casper.infradead.org/ [1]
+Link: https://lore.kernel.org/all/87ilk6gos2.fsf@oracle.com/T/#u [2]
+Link: https://lkml.kernel.org/r/fd39b0bfa7edc76d360def7d034baaee71d90158.1679511146.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Liu Shixin <liushixin2@huawei.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 3d5854d75e31 ("fs/proc/kcore.c: allow translation of physical memory addresses")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/kcore.c | 17 +++++------------
+ 1 file changed, 5 insertions(+), 12 deletions(-)
+
+diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
+index 590ecb79ad8b6..786e5e90f670c 100644
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -542,19 +542,12 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
+ /*
+- * Using bounce buffer to bypass the
+- * hardened user copy kernel text checks.
++ * We use _copy_to_user() to bypass usermode hardening
++ * which would otherwise prevent this operation.
+ */
+- if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+- if (clear_user(buffer, tsz)) {
+- ret = -EFAULT;
+- goto out;
+- }
+- } else {
+- if (copy_to_user(buffer, buf, tsz)) {
+- ret = -EFAULT;
+- goto out;
+- }
++ if (_copy_to_user(buffer, (char *)start, tsz)) {
++ ret = -EFAULT;
++ goto out;
+ }
+ break;
+ default:
+--
+2.43.0
+
--- /dev/null
+From 8477089ee95584a864ddd2cd9761b517d8be1ee9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Mar 2023 18:57:02 +0000
+Subject: fs/proc/kcore: convert read_kcore() to read_kcore_iter()
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 46c0d6d0904a10785faabee53fe53ee1aa718fea ]
+
+For the time being we still use a bounce buffer for vread(), however in
+the next patch we will convert this to interact directly with the iterator
+and eliminate the bounce buffer altogether.
+
+Link: https://lkml.kernel.org/r/ebe12c8d70eebd71f487d80095605f3ad0d1489c.1679511146.git.lstoakes@gmail.com
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reviewed-by: David Hildenbrand <david@redhat.com>
+Reviewed-by: Baoquan He <bhe@redhat.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Liu Shixin <liushixin2@huawei.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 3d5854d75e31 ("fs/proc/kcore.c: allow translation of physical memory addresses")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/kcore.c | 36 ++++++++++++++++++------------------
+ 1 file changed, 18 insertions(+), 18 deletions(-)
+
+diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
+index 786e5e90f670c..2aff567abe1e3 100644
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -25,7 +25,7 @@
+ #include <linux/memblock.h>
+ #include <linux/init.h>
+ #include <linux/slab.h>
+-#include <linux/uaccess.h>
++#include <linux/uio.h>
+ #include <asm/io.h>
+ #include <linux/list.h>
+ #include <linux/ioport.h>
+@@ -309,9 +309,12 @@ static void append_kcore_note(char *notes, size_t *i, const char *name,
+ }
+
+ static ssize_t
+-read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
++read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
+ {
++ struct file *file = iocb->ki_filp;
+ char *buf = file->private_data;
++ loff_t *fpos = &iocb->ki_pos;
++
+ size_t phdrs_offset, notes_offset, data_offset;
+ size_t page_offline_frozen = 1;
+ size_t phdrs_len, notes_len;
+@@ -319,6 +322,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ size_t tsz;
+ int nphdr;
+ unsigned long start;
++ size_t buflen = iov_iter_count(iter);
+ size_t orig_buflen = buflen;
+ int ret = 0;
+
+@@ -357,12 +361,11 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ };
+
+ tsz = min_t(size_t, buflen, sizeof(struct elfhdr) - *fpos);
+- if (copy_to_user(buffer, (char *)&ehdr + *fpos, tsz)) {
++ if (copy_to_iter((char *)&ehdr + *fpos, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+- buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
+@@ -399,15 +402,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ }
+
+ tsz = min_t(size_t, buflen, phdrs_offset + phdrs_len - *fpos);
+- if (copy_to_user(buffer, (char *)phdrs + *fpos - phdrs_offset,
+- tsz)) {
++ if (copy_to_iter((char *)phdrs + *fpos - phdrs_offset, tsz,
++ iter) != tsz) {
+ kfree(phdrs);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(phdrs);
+
+- buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
+@@ -449,14 +451,13 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ min(vmcoreinfo_size, notes_len - i));
+
+ tsz = min_t(size_t, buflen, notes_offset + notes_len - *fpos);
+- if (copy_to_user(buffer, notes + *fpos - notes_offset, tsz)) {
++ if (copy_to_iter(notes + *fpos - notes_offset, tsz, iter) != tsz) {
+ kfree(notes);
+ ret = -EFAULT;
+ goto out;
+ }
+ kfree(notes);
+
+- buffer += tsz;
+ buflen -= tsz;
+ *fpos += tsz;
+ }
+@@ -498,7 +499,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ }
+
+ if (!m) {
+- if (clear_user(buffer, tsz)) {
++ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -509,14 +510,14 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ case KCORE_VMALLOC:
+ vread(buf, (char *)start, tsz);
+ /* we have to zero-fill user buffer even if no read */
+- if (copy_to_user(buffer, buf, tsz)) {
++ if (copy_to_iter(buf, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
+ case KCORE_USER:
+ /* User page is handled prior to normal kernel page: */
+- if (copy_to_user(buffer, (char *)start, tsz)) {
++ if (copy_to_iter((char *)start, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -532,7 +533,7 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ */
+ if (!page || PageOffline(page) ||
+ is_page_hwpoison(page) || !pfn_is_ram(pfn)) {
+- if (clear_user(buffer, tsz)) {
++ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -542,17 +543,17 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
+ /*
+- * We use _copy_to_user() to bypass usermode hardening
++ * We use _copy_to_iter() to bypass usermode hardening
+ * which would otherwise prevent this operation.
+ */
+- if (_copy_to_user(buffer, (char *)start, tsz)) {
++ if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+ break;
+ default:
+ pr_warn_once("Unhandled KCORE type: %d\n", m->type);
+- if (clear_user(buffer, tsz)) {
++ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+@@ -560,7 +561,6 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ skip:
+ buflen -= tsz;
+ *fpos += tsz;
+- buffer += tsz;
+ start += tsz;
+ tsz = (buflen > PAGE_SIZE ? PAGE_SIZE : buflen);
+ }
+@@ -604,7 +604,7 @@ static int release_kcore(struct inode *inode, struct file *file)
+ }
+
+ static const struct proc_ops kcore_proc_ops = {
+- .proc_read = read_kcore,
++ .proc_read_iter = read_kcore_iter,
+ .proc_open = open_kcore,
+ .proc_release = release_kcore,
+ .proc_lseek = default_llseek,
+--
+2.43.0
+
--- /dev/null
+From 9a960faeefd193ab16e27f7375f9096556b20586 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 22:50:21 +0100
+Subject: fs/proc/kcore: reinstate bounce buffer for KCORE_TEXT regions
+
+From: Lorenzo Stoakes <lstoakes@gmail.com>
+
+[ Upstream commit 17457784004c84178798432a029ab20e14f728b1 ]
+
+Some architectures do not populate the entire range categorised by
+KCORE_TEXT, so we must ensure that the kernel address we read from is
+valid.
+
+Unfortunately there is no solution currently available to do so with a
+purely iterator solution so reinstate the bounce buffer in this instance
+so we can use copy_from_kernel_nofault() in order to avoid page faults
+when regions are unmapped.
+
+This change partly reverts commit 2e1c0170771e ("fs/proc/kcore: avoid
+bounce buffer for ktext data"), reinstating the bounce buffer, but adapts
+the code to continue to use an iterator.
+
+[lstoakes@gmail.com: correct comment to be strictly correct about reasoning]
+ Link: https://lkml.kernel.org/r/525a3f14-74fa-4c22-9fca-9dab4de8a0c3@lucifer.local
+Link: https://lkml.kernel.org/r/20230731215021.70911-1-lstoakes@gmail.com
+Fixes: 2e1c0170771e ("fs/proc/kcore: avoid bounce buffer for ktext data")
+Signed-off-by: Lorenzo Stoakes <lstoakes@gmail.com>
+Reported-by: Jiri Olsa <olsajiri@gmail.com>
+Closes: https://lore.kernel.org/all/ZHc2fm+9daF6cgCE@krava
+Tested-by: Jiri Olsa <jolsa@kernel.org>
+Tested-by: Will Deacon <will@kernel.org>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: Baoquan He <bhe@redhat.com>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jens Axboe <axboe@kernel.dk>
+Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
+Cc: Liu Shixin <liushixin2@huawei.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: Mike Galbraith <efault@gmx.de>
+Cc: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: Uladzislau Rezki (Sony) <urezki@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 3d5854d75e31 ("fs/proc/kcore.c: allow translation of physical memory addresses")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/proc/kcore.c | 17 ++++++++++++++---
+ 1 file changed, 14 insertions(+), 3 deletions(-)
+
+diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
+index 2aff567abe1e3..87a46f2d84195 100644
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -543,10 +543,21 @@ read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
+ /*
+- * We use _copy_to_iter() to bypass usermode hardening
+- * which would otherwise prevent this operation.
++ * Sadly we must use a bounce buffer here to be able to
++ * make use of copy_from_kernel_nofault(), as these
++ * memory regions might not always be mapped on all
++ * architectures.
+ */
+- if (_copy_to_iter((char *)start, tsz, iter) != tsz) {
++ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
++ if (iov_iter_zero(tsz, iter) != tsz) {
++ ret = -EFAULT;
++ goto out;
++ }
++ /*
++ * We know the bounce buffer is safe to copy from, so
++ * use _copy_to_iter() directly.
++ */
++ } else if (_copy_to_iter(buf, tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+ }
+--
+2.43.0
+
--- /dev/null
+From 6a7f99cee5271987545fa02cbae7b83f37fbc69d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 30 Sep 2024 14:21:19 +0200
+Subject: fs/proc/kcore.c: allow translation of physical memory addresses
+
+From: Alexander Gordeev <agordeev@linux.ibm.com>
+
+[ Upstream commit 3d5854d75e3187147613130561b58f0b06166172 ]
+
+When /proc/kcore is read an attempt to read the first two pages results in
+HW-specific page swap on s390 and another (so called prefix) pages are
+accessed instead. That leads to a wrong read.
+
+Allow architecture-specific translation of memory addresses using
+kc_xlate_dev_mem_ptr() and kc_unxlate_dev_mem_ptr() callbacks similarily
+to /dev/mem xlate_dev_mem_ptr() and unxlate_dev_mem_ptr() callbacks. That
+way an architecture can deal with specific physical memory ranges.
+
+Re-use the existing /dev/mem callback implementation on s390, which
+handles the described prefix pages swapping correctly.
+
+For other architectures the default callback is basically NOP. It is
+expected the condition (vaddr == __va(__pa(vaddr))) always holds true for
+KCORE_RAM memory type.
+
+Link: https://lkml.kernel.org/r/20240930122119.1651546-1-agordeev@linux.ibm.com
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Suggested-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/io.h | 2 ++
+ fs/proc/kcore.c | 36 ++++++++++++++++++++++++++++++++++--
+ 2 files changed, 36 insertions(+), 2 deletions(-)
+
+diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h
+index e3882b012bfa4..70e679d87984b 100644
+--- a/arch/s390/include/asm/io.h
++++ b/arch/s390/include/asm/io.h
+@@ -16,8 +16,10 @@
+ #include <asm/pci_io.h>
+
+ #define xlate_dev_mem_ptr xlate_dev_mem_ptr
++#define kc_xlate_dev_mem_ptr xlate_dev_mem_ptr
+ void *xlate_dev_mem_ptr(phys_addr_t phys);
+ #define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
++#define kc_unxlate_dev_mem_ptr unxlate_dev_mem_ptr
+ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
+
+ #define IO_SPACE_LIMIT 0
+diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
+index 87a46f2d84195..a2d430549012f 100644
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -51,6 +51,20 @@ static struct proc_dir_entry *proc_root_kcore;
+ #define kc_offset_to_vaddr(o) ((o) + PAGE_OFFSET)
+ #endif
+
++#ifndef kc_xlate_dev_mem_ptr
++#define kc_xlate_dev_mem_ptr kc_xlate_dev_mem_ptr
++static inline void *kc_xlate_dev_mem_ptr(phys_addr_t phys)
++{
++ return __va(phys);
++}
++#endif
++#ifndef kc_unxlate_dev_mem_ptr
++#define kc_unxlate_dev_mem_ptr kc_unxlate_dev_mem_ptr
++static inline void kc_unxlate_dev_mem_ptr(phys_addr_t phys, void *virt)
++{
++}
++#endif
++
+ static LIST_HEAD(kclist_head);
+ static DECLARE_RWSEM(kclist_lock);
+ static int kcore_need_update = 1;
+@@ -474,6 +488,8 @@ read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
+ while (buflen) {
+ struct page *page;
+ unsigned long pfn;
++ phys_addr_t phys;
++ void *__start;
+
+ /*
+ * If this is the first iteration or the address is not within
+@@ -523,7 +539,8 @@ read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
+ }
+ break;
+ case KCORE_RAM:
+- pfn = __pa(start) >> PAGE_SHIFT;
++ phys = __pa(start);
++ pfn = phys >> PAGE_SHIFT;
+ page = pfn_to_online_page(pfn);
+
+ /*
+@@ -542,13 +559,28 @@ read_kcore_iter(struct kiocb *iocb, struct iov_iter *iter)
+ fallthrough;
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
++ if (m->type == KCORE_RAM) {
++ __start = kc_xlate_dev_mem_ptr(phys);
++ if (!__start) {
++ ret = -ENOMEM;
++ if (iov_iter_zero(tsz, iter) != tsz)
++ ret = -EFAULT;
++ goto out;
++ }
++ } else {
++ __start = (void *)start;
++ }
++
+ /*
+ * Sadly we must use a bounce buffer here to be able to
+ * make use of copy_from_kernel_nofault(), as these
+ * memory regions might not always be mapped on all
+ * architectures.
+ */
+- if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
++ ret = copy_from_kernel_nofault(buf, __start, tsz);
++ if (m->type == KCORE_RAM)
++ kc_unxlate_dev_mem_ptr(phys, __start);
++ if (ret) {
+ if (iov_iter_zero(tsz, iter) != tsz) {
+ ret = -EFAULT;
+ goto out;
+--
+2.43.0
+
--- /dev/null
+From 42d99b3517073683091dce06d2ab83cd622cc812 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Oct 2022 15:40:14 +0800
+Subject: mm: remove kern_addr_valid() completely
+
+From: Kefeng Wang <wangkefeng.wang@huawei.com>
+
+[ Upstream commit e025ab842ec35225b1a8e163d1f311beb9e38ce9 ]
+
+Most architectures (except arm64/x86/sparc) simply return 1 for
+kern_addr_valid(), which is only used in read_kcore(), and it calls
+copy_from_kernel_nofault() which could check whether the address is a
+valid kernel address. So as there is no need for kern_addr_valid(), let's
+remove it.
+
+Link: https://lkml.kernel.org/r/20221018074014.185687-1-wangkefeng.wang@huawei.com
+Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
+Acked-by: Geert Uytterhoeven <geert@linux-m68k.org> [m68k]
+Acked-by: Heiko Carstens <hca@linux.ibm.com> [s390]
+Acked-by: Christoph Hellwig <hch@lst.de>
+Acked-by: Helge Deller <deller@gmx.de> [parisc]
+Acked-by: Michael Ellerman <mpe@ellerman.id.au> [powerpc]
+Acked-by: Guo Ren <guoren@kernel.org> [csky]
+Acked-by: Catalin Marinas <catalin.marinas@arm.com> [arm64]
+Cc: Alexander Gordeev <agordeev@linux.ibm.com>
+Cc: Andy Lutomirski <luto@kernel.org>
+Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
+Cc: <aou@eecs.berkeley.edu>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
+Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
+Cc: Chris Zankel <chris@zankel.net>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: David S. Miller <davem@davemloft.net>
+Cc: Dinh Nguyen <dinguyen@kernel.org>
+Cc: Greg Ungerer <gerg@linux-m68k.org>
+Cc: H. Peter Anvin <hpa@zytor.com>
+Cc: Huacai Chen <chenhuacai@kernel.org>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
+Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Cc: Jonas Bonn <jonas@southpole.se>
+Cc: Matt Turner <mattst88@gmail.com>
+Cc: Max Filippov <jcmvbkbc@gmail.com>
+Cc: Michal Simek <monstr@monstr.eu>
+Cc: Nicholas Piggin <npiggin@gmail.com>
+Cc: Palmer Dabbelt <palmer@rivosinc.com>
+Cc: Paul Walmsley <paul.walmsley@sifive.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Richard Henderson <richard.henderson@linaro.org>
+Cc: Richard Weinberger <richard@nod.at>
+Cc: Rich Felker <dalias@libc.org>
+Cc: Russell King <linux@armlinux.org.uk>
+Cc: Stafford Horne <shorne@gmail.com>
+Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+Cc: Sven Schnelle <svens@linux.ibm.com>
+Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Vasily Gorbik <gor@linux.ibm.com>
+Cc: Vineet Gupta <vgupta@kernel.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: Xuerui Wang <kernel@xen0n.name>
+Cc: Yoshinori Sato <ysato@users.osdn.me>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 3d5854d75e31 ("fs/proc/kcore.c: allow translation of physical memory addresses")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/alpha/include/asm/pgtable.h | 2 -
+ arch/arc/include/asm/pgtable-bits-arcv2.h | 2 -
+ arch/arm/include/asm/pgtable-nommu.h | 2 -
+ arch/arm/include/asm/pgtable.h | 4 --
+ arch/arm64/include/asm/pgtable.h | 2 -
+ arch/arm64/mm/mmu.c | 47 -----------------------
+ arch/arm64/mm/pageattr.c | 3 +-
+ arch/csky/include/asm/pgtable.h | 3 --
+ arch/hexagon/include/asm/page.h | 7 ----
+ arch/ia64/include/asm/pgtable.h | 16 --------
+ arch/loongarch/include/asm/pgtable.h | 2 -
+ arch/m68k/include/asm/pgtable_mm.h | 2 -
+ arch/m68k/include/asm/pgtable_no.h | 1 -
+ arch/microblaze/include/asm/pgtable.h | 3 --
+ arch/mips/include/asm/pgtable.h | 2 -
+ arch/nios2/include/asm/pgtable.h | 2 -
+ arch/openrisc/include/asm/pgtable.h | 2 -
+ arch/parisc/include/asm/pgtable.h | 15 --------
+ arch/powerpc/include/asm/pgtable.h | 7 ----
+ arch/riscv/include/asm/pgtable.h | 2 -
+ arch/s390/include/asm/pgtable.h | 2 -
+ arch/sh/include/asm/pgtable.h | 2 -
+ arch/sparc/include/asm/pgtable_32.h | 6 ---
+ arch/sparc/mm/init_32.c | 3 +-
+ arch/sparc/mm/init_64.c | 1 -
+ arch/um/include/asm/pgtable.h | 2 -
+ arch/x86/include/asm/pgtable_32.h | 9 -----
+ arch/x86/include/asm/pgtable_64.h | 1 -
+ arch/x86/mm/init_64.c | 41 --------------------
+ arch/xtensa/include/asm/pgtable.h | 2 -
+ fs/proc/kcore.c | 26 +++++--------
+ 31 files changed, 11 insertions(+), 210 deletions(-)
+
+diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
+index 3ea9661c09ffc..9e45f6735d5d2 100644
+--- a/arch/alpha/include/asm/pgtable.h
++++ b/arch/alpha/include/asm/pgtable.h
+@@ -313,8 +313,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-#define kern_addr_valid(addr) (1)
+-
+ #define pte_ERROR(e) \
+ printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
+ #define pmd_ERROR(e) \
+diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
+index b23be557403e3..515e82db519fe 100644
+--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
++++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
+@@ -120,8 +120,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-#define kern_addr_valid(addr) (1)
+-
+ #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ #include <asm/hugepage.h>
+ #endif
+diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
+index 090011394477f..61480d096054d 100644
+--- a/arch/arm/include/asm/pgtable-nommu.h
++++ b/arch/arm/include/asm/pgtable-nommu.h
+@@ -21,8 +21,6 @@
+ #define pgd_none(pgd) (0)
+ #define pgd_bad(pgd) (0)
+ #define pgd_clear(pgdp)
+-#define kern_addr_valid(addr) (1)
+-/* FIXME */
+ /*
+ * PMD_SHIFT determines the size of the area a second-level page table can map
+ * PGDIR_SHIFT determines what a third-level page table entry can map
+diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
+index ef48a55e9af83..f049072b2e858 100644
+--- a/arch/arm/include/asm/pgtable.h
++++ b/arch/arm/include/asm/pgtable.h
+@@ -300,10 +300,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ */
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-/* FIXME: this is not correct */
+-#define kern_addr_valid(addr) (1)
+-
+ /*
+ * We provide our own arch_get_unmapped_area to cope with VIPT caches.
+ */
+diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
+index 56c7df4c65325..1d713cfb0af16 100644
+--- a/arch/arm64/include/asm/pgtable.h
++++ b/arch/arm64/include/asm/pgtable.h
+@@ -1027,8 +1027,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ */
+ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
+
+-extern int kern_addr_valid(unsigned long addr);
+-
+ #ifdef CONFIG_ARM64_MTE
+
+ #define __HAVE_ARCH_PREPARE_TO_SWAP
+diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
+index 4b302dbf78e96..6a4f118fb25f4 100644
+--- a/arch/arm64/mm/mmu.c
++++ b/arch/arm64/mm/mmu.c
+@@ -875,53 +875,6 @@ void __init paging_init(void)
+ create_idmap();
+ }
+
+-/*
+- * Check whether a kernel address is valid (derived from arch/x86/).
+- */
+-int kern_addr_valid(unsigned long addr)
+-{
+- pgd_t *pgdp;
+- p4d_t *p4dp;
+- pud_t *pudp, pud;
+- pmd_t *pmdp, pmd;
+- pte_t *ptep, pte;
+-
+- addr = arch_kasan_reset_tag(addr);
+- if ((((long)addr) >> VA_BITS) != -1UL)
+- return 0;
+-
+- pgdp = pgd_offset_k(addr);
+- if (pgd_none(READ_ONCE(*pgdp)))
+- return 0;
+-
+- p4dp = p4d_offset(pgdp, addr);
+- if (p4d_none(READ_ONCE(*p4dp)))
+- return 0;
+-
+- pudp = pud_offset(p4dp, addr);
+- pud = READ_ONCE(*pudp);
+- if (pud_none(pud))
+- return 0;
+-
+- if (pud_sect(pud))
+- return pfn_valid(pud_pfn(pud));
+-
+- pmdp = pmd_offset(pudp, addr);
+- pmd = READ_ONCE(*pmdp);
+- if (pmd_none(pmd))
+- return 0;
+-
+- if (pmd_sect(pmd))
+- return pfn_valid(pmd_pfn(pmd));
+-
+- ptep = pte_offset_kernel(pmdp, addr);
+- pte = READ_ONCE(*ptep);
+- if (pte_none(pte))
+- return 0;
+-
+- return pfn_valid(pte_pfn(pte));
+-}
+-
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ static void free_hotplug_page_range(struct page *page, size_t size,
+ struct vmem_altmap *altmap)
+diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
+index 425b398f8d456..0a62f458c5cb0 100644
+--- a/arch/arm64/mm/pageattr.c
++++ b/arch/arm64/mm/pageattr.c
+@@ -204,8 +204,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
+
+ /*
+ * This function is used to determine if a linear map page has been marked as
+- * not-valid. Walk the page table and check the PTE_VALID bit. This is based
+- * on kern_addr_valid(), which almost does what we need.
++ * not-valid. Walk the page table and check the PTE_VALID bit.
+ *
+ * Because this is only called on the kernel linear map, p?d_sect() implies
+ * p?d_present(). When debug_pagealloc is enabled, sections mappings are
+diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
+index c3d9b92cbe61c..77bc6caff2d23 100644
+--- a/arch/csky/include/asm/pgtable.h
++++ b/arch/csky/include/asm/pgtable.h
+@@ -249,9 +249,6 @@ extern void paging_init(void);
+ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte);
+
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-#define kern_addr_valid(addr) (1)
+-
+ #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
+ remap_pfn_range(vma, vaddr, pfn, size, prot)
+
+diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
+index 7cbf719c578ec..d7d4f9fca3279 100644
+--- a/arch/hexagon/include/asm/page.h
++++ b/arch/hexagon/include/asm/page.h
+@@ -131,13 +131,6 @@ static inline void clear_page(void *page)
+
+ #define page_to_virt(page) __va(page_to_phys(page))
+
+-/*
+- * For port to Hexagon Virtual Machine, MAYBE we check for attempts
+- * to reference reserved HVM space, but in any case, the VM will be
+- * protected.
+- */
+-#define kern_addr_valid(addr) (1)
+-
+ #include <asm/mem-layout.h>
+ #include <asm-generic/memory_model.h>
+ /* XXX Todo: implement assembly-optimized version of getorder. */
+diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
+index 6925e28ae61d1..01517a5e67789 100644
+--- a/arch/ia64/include/asm/pgtable.h
++++ b/arch/ia64/include/asm/pgtable.h
+@@ -181,22 +181,6 @@ ia64_phys_addr_valid (unsigned long addr)
+ return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
+ }
+
+-/*
+- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+- * memory. For the return value to be meaningful, ADDR must be >=
+- * PAGE_OFFSET. This operation can be relatively expensive (e.g.,
+- * require a hash-, or multi-level tree-lookup or something of that
+- * sort) but it guarantees to return TRUE only if accessing the page
+- * at that address does not cause an error. Note that there may be
+- * addresses for which kern_addr_valid() returns FALSE even though an
+- * access would not cause an error (e.g., this is typically true for
+- * memory mapped I/O regions.
+- *
+- * XXX Need to implement this for IA-64.
+- */
+-#define kern_addr_valid(addr) (1)
+-
+-
+ /*
+ * Now come the defines and routines to manage and access the three-level
+ * page table.
+diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
+index f991e678ca4b7..103df0eb8642a 100644
+--- a/arch/loongarch/include/asm/pgtable.h
++++ b/arch/loongarch/include/asm/pgtable.h
+@@ -425,8 +425,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ __update_tlb(vma, address, (pte_t *)pmdp);
+ }
+
+-#define kern_addr_valid(addr) (1)
+-
+ static inline unsigned long pmd_pfn(pmd_t pmd)
+ {
+ return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
+diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
+index 9b4e2fe2ac821..b93c41fe20678 100644
+--- a/arch/m68k/include/asm/pgtable_mm.h
++++ b/arch/m68k/include/asm/pgtable_mm.h
+@@ -145,8 +145,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
+
+ #endif /* !__ASSEMBLY__ */
+
+-#define kern_addr_valid(addr) (1)
+-
+ /* MMU-specific headers */
+
+ #ifdef CONFIG_SUN3
+diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
+index bce5ca56c3883..fed58da3a6b65 100644
+--- a/arch/m68k/include/asm/pgtable_no.h
++++ b/arch/m68k/include/asm/pgtable_no.h
+@@ -20,7 +20,6 @@
+ #define pgd_none(pgd) (0)
+ #define pgd_bad(pgd) (0)
+ #define pgd_clear(pgdp)
+-#define kern_addr_valid(addr) (1)
+ #define pmd_offset(a, b) ((void *)0)
+
+ #define PAGE_NONE __pgprot(0)
+diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
+index ba348e997dbb4..42f5988e998b8 100644
+--- a/arch/microblaze/include/asm/pgtable.h
++++ b/arch/microblaze/include/asm/pgtable.h
+@@ -416,9 +416,6 @@ extern unsigned long iopa(unsigned long addr);
+ #define IOMAP_NOCACHE_NONSER 2
+ #define IOMAP_NO_COPYBACK 3
+
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-#define kern_addr_valid(addr) (1)
+-
+ void do_page_fault(struct pt_regs *regs, unsigned long address,
+ unsigned long error_code);
+
+diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
+index 4678627673dfe..a68c0b01d8cdc 100644
+--- a/arch/mips/include/asm/pgtable.h
++++ b/arch/mips/include/asm/pgtable.h
+@@ -550,8 +550,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
+ __update_tlb(vma, address, pte);
+ }
+
+-#define kern_addr_valid(addr) (1)
+-
+ /*
+ * Allow physical addresses to be fixed up to help 36-bit peripherals.
+ */
+diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
+index b3d45e815295f..ab793bc517f5c 100644
+--- a/arch/nios2/include/asm/pgtable.h
++++ b/arch/nios2/include/asm/pgtable.h
+@@ -249,8 +249,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
+ #define __swp_entry_to_pte(swp) ((pte_t) { (swp).val })
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+
+-#define kern_addr_valid(addr) (1)
+-
+ extern void __init paging_init(void);
+ extern void __init mmu_init(void);
+
+diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
+index dcae8aea132fd..6477c17b3062d 100644
+--- a/arch/openrisc/include/asm/pgtable.h
++++ b/arch/openrisc/include/asm/pgtable.h
+@@ -395,8 +395,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-#define kern_addr_valid(addr) (1)
+-
+ typedef pte_t *pte_addr_t;
+
+ #endif /* __ASSEMBLY__ */
+diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
+index 68ae77069d23f..ea357430aafeb 100644
+--- a/arch/parisc/include/asm/pgtable.h
++++ b/arch/parisc/include/asm/pgtable.h
+@@ -23,21 +23,6 @@
+ #include <asm/processor.h>
+ #include <asm/cache.h>
+
+-/*
+- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
+- * memory. For the return value to be meaningful, ADDR must be >=
+- * PAGE_OFFSET. This operation can be relatively expensive (e.g.,
+- * require a hash-, or multi-level tree-lookup or something of that
+- * sort) but it guarantees to return TRUE only if accessing the page
+- * at that address does not cause an error. Note that there may be
+- * addresses for which kern_addr_valid() returns FALSE even though an
+- * access would not cause an error (e.g., this is typically true for
+- * memory mapped I/O regions.
+- *
+- * XXX Need to implement this for parisc.
+- */
+-#define kern_addr_valid(addr) (1)
+-
+ /* This is for the serialization of PxTLB broadcasts. At least on the N class
+ * systems, only one PxTLB inter processor broadcast can be active at any one
+ * time on the Merced bus. */
+diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
+index 283f40d05a4d7..9972626ddaf68 100644
+--- a/arch/powerpc/include/asm/pgtable.h
++++ b/arch/powerpc/include/asm/pgtable.h
+@@ -81,13 +81,6 @@ void poking_init(void);
+ extern unsigned long ioremap_bot;
+ extern const pgprot_t protection_map[16];
+
+-/*
+- * kern_addr_valid is intended to indicate whether an address is a valid
+- * kernel address. Most 32-bit archs define it as always true (like this)
+- * but most 64-bit archs actually perform a test. What should we do here?
+- */
+-#define kern_addr_valid(addr) (1)
+-
+ #ifndef CONFIG_TRANSPARENT_HUGEPAGE
+ #define pmd_large(pmd) 0
+ #endif
+diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
+index 2d9416a6a070e..7d1688f850c31 100644
+--- a/arch/riscv/include/asm/pgtable.h
++++ b/arch/riscv/include/asm/pgtable.h
+@@ -805,8 +805,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+
+ #endif /* !CONFIG_MMU */
+
+-#define kern_addr_valid(addr) (1) /* FIXME */
+-
+ extern char _start[];
+ extern void *_dtb_early_va;
+ extern uintptr_t _dtb_early_pa;
+diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
+index 956300e3568a4..4d6ab5f0a4cf0 100644
+--- a/arch/s390/include/asm/pgtable.h
++++ b/arch/s390/include/asm/pgtable.h
+@@ -1776,8 +1776,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
+ #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-#define kern_addr_valid(addr) (1)
+-
+ extern int vmem_add_mapping(unsigned long start, unsigned long size);
+ extern void vmem_remove_mapping(unsigned long start, unsigned long size);
+ extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
+diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
+index 6fb9ec54cf9b4..3ce30becf6dfa 100644
+--- a/arch/sh/include/asm/pgtable.h
++++ b/arch/sh/include/asm/pgtable.h
+@@ -92,8 +92,6 @@ static inline unsigned long phys_addr_mask(void)
+
+ typedef pte_t *pte_addr_t;
+
+-#define kern_addr_valid(addr) (1)
+-
+ #define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
+
+ struct vm_area_struct;
+diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
+index 8ff549004fac4..5acc05b572e65 100644
+--- a/arch/sparc/include/asm/pgtable_32.h
++++ b/arch/sparc/include/asm/pgtable_32.h
+@@ -368,12 +368,6 @@ __get_iospace (unsigned long addr)
+ }
+ }
+
+-extern unsigned long *sparc_valid_addr_bitmap;
+-
+-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
+-#define kern_addr_valid(addr) \
+- (test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
+-
+ /*
+ * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
+ * its high 4 bits. These macros/functions put it there or get it from there.
+diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
+index d88e774c8eb49..9c0ea457bdf05 100644
+--- a/arch/sparc/mm/init_32.c
++++ b/arch/sparc/mm/init_32.c
+@@ -37,8 +37,7 @@
+
+ #include "mm_32.h"
+
+-unsigned long *sparc_valid_addr_bitmap;
+-EXPORT_SYMBOL(sparc_valid_addr_bitmap);
++static unsigned long *sparc_valid_addr_bitmap;
+
+ unsigned long phys_base;
+ EXPORT_SYMBOL(phys_base);
+diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
+index d6faee23c77dd..04f9db0c31117 100644
+--- a/arch/sparc/mm/init_64.c
++++ b/arch/sparc/mm/init_64.c
+@@ -1667,7 +1667,6 @@ bool kern_addr_valid(unsigned long addr)
+
+ return pfn_valid(pte_pfn(*pte));
+ }
+-EXPORT_SYMBOL(kern_addr_valid);
+
+ static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
+ unsigned long vend,
+diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
+index 66bc3f99d9bef..4e3052f2671a0 100644
+--- a/arch/um/include/asm/pgtable.h
++++ b/arch/um/include/asm/pgtable.h
+@@ -298,8 +298,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+
+-#define kern_addr_valid(addr) (1)
+-
+ /* Clear a kernel PTE and flush it from the TLB */
+ #define kpte_clear_flush(ptep, vaddr) \
+ do { \
+diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
+index 7c9c968a42efe..7d4ad8907297c 100644
+--- a/arch/x86/include/asm/pgtable_32.h
++++ b/arch/x86/include/asm/pgtable_32.h
+@@ -47,15 +47,6 @@ do { \
+
+ #endif /* !__ASSEMBLY__ */
+
+-/*
+- * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM
+- */
+-#ifdef CONFIG_FLATMEM
+-#define kern_addr_valid(addr) (1)
+-#else
+-#define kern_addr_valid(kaddr) (0)
+-#endif
+-
+ /*
+ * This is used to calculate the .brk reservation for initial pagetables.
+ * Enough space is reserved to allocate pagetables sufficient to cover all
+diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
+index 07cd53eeec770..a629b1b9f65a6 100644
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -240,7 +240,6 @@ static inline void native_pgd_clear(pgd_t *pgd)
+ #define __swp_entry_to_pte(x) (__pte((x).val))
+ #define __swp_entry_to_pmd(x) (__pmd((x).val))
+
+-extern int kern_addr_valid(unsigned long addr);
+ extern void cleanup_highmap(void);
+
+ #define HAVE_ARCH_UNMAPPED_AREA
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index 6d294d24e488e..851711509d383 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -1420,47 +1420,6 @@ void mark_rodata_ro(void)
+ debug_checkwx();
+ }
+
+-int kern_addr_valid(unsigned long addr)
+-{
+- unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
+- pgd_t *pgd;
+- p4d_t *p4d;
+- pud_t *pud;
+- pmd_t *pmd;
+- pte_t *pte;
+-
+- if (above != 0 && above != -1UL)
+- return 0;
+-
+- pgd = pgd_offset_k(addr);
+- if (pgd_none(*pgd))
+- return 0;
+-
+- p4d = p4d_offset(pgd, addr);
+- if (!p4d_present(*p4d))
+- return 0;
+-
+- pud = pud_offset(p4d, addr);
+- if (!pud_present(*pud))
+- return 0;
+-
+- if (pud_large(*pud))
+- return pfn_valid(pud_pfn(*pud));
+-
+- pmd = pmd_offset(pud, addr);
+- if (!pmd_present(*pmd))
+- return 0;
+-
+- if (pmd_large(*pmd))
+- return pfn_valid(pmd_pfn(*pmd));
+-
+- pte = pte_offset_kernel(pmd, addr);
+- if (pte_none(*pte))
+- return 0;
+-
+- return pfn_valid(pte_pfn(*pte));
+-}
+-
+ /*
+ * Block size is the minimum amount of memory which can be hotplugged or
+ * hotremoved. It must be power of two and must be equal or larger than
+diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
+index 54f577c13afa1..5b5484d707b2e 100644
+--- a/arch/xtensa/include/asm/pgtable.h
++++ b/arch/xtensa/include/asm/pgtable.h
+@@ -386,8 +386,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+
+ #else
+
+-#define kern_addr_valid(addr) (1)
+-
+ extern void update_mmu_cache(struct vm_area_struct * vma,
+ unsigned long address, pte_t *ptep);
+
+diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
+index dff921f7ca332..590ecb79ad8b6 100644
+--- a/fs/proc/kcore.c
++++ b/fs/proc/kcore.c
+@@ -541,25 +541,17 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
+ fallthrough;
+ case KCORE_VMEMMAP:
+ case KCORE_TEXT:
+- if (kern_addr_valid(start)) {
+- /*
+- * Using bounce buffer to bypass the
+- * hardened user copy kernel text checks.
+- */
+- if (copy_from_kernel_nofault(buf, (void *)start,
+- tsz)) {
+- if (clear_user(buffer, tsz)) {
+- ret = -EFAULT;
+- goto out;
+- }
+- } else {
+- if (copy_to_user(buffer, buf, tsz)) {
+- ret = -EFAULT;
+- goto out;
+- }
++ /*
++ * Using bounce buffer to bypass the
++ * hardened user copy kernel text checks.
++ */
++ if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
++ if (clear_user(buffer, tsz)) {
++ ret = -EFAULT;
++ goto out;
+ }
+ } else {
+- if (clear_user(buffer, tsz)) {
++ if (copy_to_user(buffer, buf, tsz)) {
+ ret = -EFAULT;
+ goto out;
+ }
+--
+2.43.0
+
--- /dev/null
+From df1320a5a6d8564e0aba032b210d9f28cb42dbd9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 27 Sep 2024 00:07:52 -0500
+Subject: selftests/mm: fix incorrect buffer->mirror size in hmm2 double_map
+ test
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Donet Tom <donettom@linux.ibm.com>
+
+[ Upstream commit 76503e1fa1a53ef041a120825d5ce81c7fe7bdd7 ]
+
+The hmm2 double_map test was failing due to an incorrect buffer->mirror
+size. The buffer->mirror size was 6, while buffer->ptr size was 6 *
+PAGE_SIZE. The test failed because the kernel's copy_to_user function was
+attempting to copy a 6 * PAGE_SIZE buffer to buffer->mirror. Since the
+size of buffer->mirror was incorrect, copy_to_user failed.
+
+This patch corrects the buffer->mirror size to 6 * PAGE_SIZE.
+
+Test Result without this patch
+==============================
+ # RUN hmm2.hmm2_device_private.double_map ...
+ # hmm-tests.c:1680:double_map:Expected ret (-14) == 0 (0)
+ # double_map: Test terminated by assertion
+ # FAIL hmm2.hmm2_device_private.double_map
+ not ok 53 hmm2.hmm2_device_private.double_map
+
+Test Result with this patch
+===========================
+ # RUN hmm2.hmm2_device_private.double_map ...
+ # OK hmm2.hmm2_device_private.double_map
+ ok 53 hmm2.hmm2_device_private.double_map
+
+Link: https://lkml.kernel.org/r/20240927050752.51066-1-donettom@linux.ibm.com
+Fixes: fee9f6d1b8df ("mm/hmm/test: add selftests for HMM")
+Signed-off-by: Donet Tom <donettom@linux.ibm.com>
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Jérôme Glisse <jglisse@redhat.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mark Brown <broonie@kernel.org>
+Cc: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: Ralph Campbell <rcampbell@nvidia.com>
+Cc: Jason Gunthorpe <jgg@mellanox.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/vm/hmm-tests.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
+index 4adaad1b822f0..95af1a73f505f 100644
+--- a/tools/testing/selftests/vm/hmm-tests.c
++++ b/tools/testing/selftests/vm/hmm-tests.c
+@@ -1652,7 +1652,7 @@ TEST_F(hmm2, double_map)
+
+ buffer->fd = -1;
+ buffer->size = size;
+- buffer->mirror = malloc(npages);
++ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+--
+2.43.0
+
--- /dev/null
+cpufreq-generalize-of_perf_domain_get_sharing_cpumas.patch
+cpufreq-avoid-a-bad-reference-count-on-cpu-node.patch
+selftests-mm-fix-incorrect-buffer-mirror-size-in-hmm.patch
+mm-remove-kern_addr_valid-completely.patch
+fs-proc-kcore-avoid-bounce-buffer-for-ktext-data.patch
+fs-proc-kcore-convert-read_kcore-to-read_kcore_iter.patch
+fs-proc-kcore-reinstate-bounce-buffer-for-kcore_text.patch
+fs-proc-kcore.c-allow-translation-of-physical-memory.patch
+cgroup-fix-potential-overflow-issue-when-checking-ma.patch