]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.10-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 15:34:55 +0000 (16:34 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Nov 2022 15:34:55 +0000 (16:34 +0100)
added patches:
capabilities-fix-potential-memleak-on-error-path-from-vfs_getxattr_alloc.patch
efi-random-reduce-seed-size-to-32-bytes.patch
efi-random-use-acpi-reclaim-memory-for-random-seed.patch
fuse-add-file_modified-to-fallocate.patch
kprobe-reverse-kp-flags-when-arm_kprobe-failed.patch
perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch
perf-x86-intel-fix-pebs-event-constraints-for-icl.patch
tcp-udp-make-early_demux-back-namespacified.patch
tools-nolibc-string-fix-memcmp-implementation.patch
tracing-histogram-update-document-for-keys_max-size.patch
tracing-kprobe-fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch

12 files changed:
queue-5.10/capabilities-fix-potential-memleak-on-error-path-from-vfs_getxattr_alloc.patch [new file with mode: 0644]
queue-5.10/efi-random-reduce-seed-size-to-32-bytes.patch [new file with mode: 0644]
queue-5.10/efi-random-use-acpi-reclaim-memory-for-random-seed.patch [new file with mode: 0644]
queue-5.10/fuse-add-file_modified-to-fallocate.patch [new file with mode: 0644]
queue-5.10/kprobe-reverse-kp-flags-when-arm_kprobe-failed.patch [new file with mode: 0644]
queue-5.10/perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch [new file with mode: 0644]
queue-5.10/perf-x86-intel-fix-pebs-event-constraints-for-icl.patch [new file with mode: 0644]
queue-5.10/series
queue-5.10/tcp-udp-make-early_demux-back-namespacified.patch [new file with mode: 0644]
queue-5.10/tools-nolibc-string-fix-memcmp-implementation.patch [new file with mode: 0644]
queue-5.10/tracing-histogram-update-document-for-keys_max-size.patch [new file with mode: 0644]
queue-5.10/tracing-kprobe-fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch [new file with mode: 0644]

diff --git a/queue-5.10/capabilities-fix-potential-memleak-on-error-path-from-vfs_getxattr_alloc.patch b/queue-5.10/capabilities-fix-potential-memleak-on-error-path-from-vfs_getxattr_alloc.patch
new file mode 100644 (file)
index 0000000..e09c71b
--- /dev/null
@@ -0,0 +1,51 @@
+From 8cf0a1bc12870d148ae830a4ba88cfdf0e879cee Mon Sep 17 00:00:00 2001
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+Date: Tue, 25 Oct 2022 21:33:57 +0800
+Subject: capabilities: fix potential memleak on error path from vfs_getxattr_alloc()
+
+From: Gaosheng Cui <cuigaosheng1@huawei.com>
+
+commit 8cf0a1bc12870d148ae830a4ba88cfdf0e879cee upstream.
+
+In cap_inode_getsecurity(), we will use vfs_getxattr_alloc() to
+complete the memory allocation of tmpbuf, if we have completed
+the memory allocation of tmpbuf, but failed to call handler->get(...),
+there will be a memleak in below logic:
+
+  |-- ret = (int)vfs_getxattr_alloc(mnt_userns, ...)
+    |           /* ^^^ alloc for tmpbuf */
+    |-- value = krealloc(*xattr_value, error + 1, flags)
+    |           /* ^^^ alloc memory */
+    |-- error = handler->get(handler, ...)
+    |           /* error! */
+    |-- *xattr_value = value
+    |           /* xattr_value is &tmpbuf (memory leak!) */
+
+So we will try to free(tmpbuf) after vfs_getxattr_alloc() fails to fix it.
+
+Cc: stable@vger.kernel.org
+Fixes: 8db6c34f1dbc ("Introduce v3 namespaced file capabilities")
+Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
+Acked-by: Serge Hallyn <serge@hallyn.com>
+[PM: subject line and backtrace tweaks]
+Signed-off-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ security/commoncap.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -391,8 +391,10 @@ int cap_inode_getsecurity(struct inode *
+                                &tmpbuf, size, GFP_NOFS);
+       dput(dentry);
+-      if (ret < 0 || !tmpbuf)
+-              return ret;
++      if (ret < 0 || !tmpbuf) {
++              size = ret;
++              goto out_free;
++      }
+       fs_ns = inode->i_sb->s_user_ns;
+       cap = (struct vfs_cap_data *) tmpbuf;
diff --git a/queue-5.10/efi-random-reduce-seed-size-to-32-bytes.patch b/queue-5.10/efi-random-reduce-seed-size-to-32-bytes.patch
new file mode 100644 (file)
index 0000000..6e42205
--- /dev/null
@@ -0,0 +1,50 @@
+From 161a438d730dade2ba2b1bf8785f0759aba4ca5f Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 20 Oct 2022 10:39:08 +0200
+Subject: efi: random: reduce seed size to 32 bytes
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 161a438d730dade2ba2b1bf8785f0759aba4ca5f upstream.
+
+We no longer need at least 64 bytes of random seed to permit the early
+crng init to complete. The RNG is now based on Blake2s, so reduce the
+EFI seed size to the Blake2s hash size, which is sufficient for our
+purposes.
+
+While at it, drop the READ_ONCE(), which was supposed to prevent size
+from being evaluated after seed was unmapped. However, this cannot
+actually happen, so READ_ONCE() is unnecessary here.
+
+Cc: <stable@vger.kernel.org> # v4.14+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/efi.c |    2 +-
+ include/linux/efi.h        |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/efi/efi.c
++++ b/drivers/firmware/efi/efi.c
+@@ -590,7 +590,7 @@ int __init efi_config_parse_tables(const
+               seed = early_memremap(efi_rng_seed, sizeof(*seed));
+               if (seed != NULL) {
+-                      size = READ_ONCE(seed->size);
++                      size = min(seed->size, EFI_RANDOM_SEED_SIZE);
+                       early_memunmap(seed, sizeof(*seed));
+               } else {
+                       pr_err("Could not map UEFI random seed!\n");
+--- a/include/linux/efi.h
++++ b/include/linux/efi.h
+@@ -1161,7 +1161,7 @@ void efi_retrieve_tpm2_eventlog(void);
+       arch_efi_call_virt_teardown();                                  \
+ })
+-#define EFI_RANDOM_SEED_SIZE          64U
++#define EFI_RANDOM_SEED_SIZE          32U // BLAKE2S_HASH_SIZE
+ struct linux_efi_random_seed {
+       u32     size;
diff --git a/queue-5.10/efi-random-use-acpi-reclaim-memory-for-random-seed.patch b/queue-5.10/efi-random-use-acpi-reclaim-memory-for-random-seed.patch
new file mode 100644 (file)
index 0000000..8f5fca6
--- /dev/null
@@ -0,0 +1,59 @@
+From 7d866e38c7e9ece8a096d0d098fa9d92b9d4f97e Mon Sep 17 00:00:00 2001
+From: Ard Biesheuvel <ardb@kernel.org>
+Date: Thu, 20 Oct 2022 10:39:09 +0200
+Subject: efi: random: Use 'ACPI reclaim' memory for random seed
+
+From: Ard Biesheuvel <ardb@kernel.org>
+
+commit 7d866e38c7e9ece8a096d0d098fa9d92b9d4f97e upstream.
+
+EFI runtime services data is guaranteed to be preserved by the OS,
+making it a suitable candidate for the EFI random seed table, which may
+be passed to kexec kernels as well (after refreshing the seed), and so
+we need to ensure that the memory is preserved without support from the
+OS itself.
+
+However, runtime services data is intended for allocations that are
+relevant to the implementations of the runtime services themselves, and
+so they are unmapped from the kernel linear map, and mapped into the EFI
+page tables that are active while runtime service invocations are in
+progress. None of this is needed for the RNG seed.
+
+So let's switch to EFI 'ACPI reclaim' memory: in spite of the name,
+there is nothing exclusively ACPI about it, it is simply a type of
+allocation that carries firmware provided data which may or may not be
+relevant to the OS, and it is left up to the OS to decide whether to
+reclaim it after having consumed its contents.
+
+Given that in Linux, we never reclaim these allocations, it is a good
+choice for the EFI RNG seed, as the allocation is guaranteed to survive
+kexec reboots.
+
+One additional reason for changing this now is to align it with the
+upcoming recommendation for EFI bootloader provided RNG seeds, which
+must not use EFI runtime services code/data allocations.
+
+Cc: <stable@vger.kernel.org> # v4.14+
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Reviewed-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/efi/libstub/random.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/drivers/firmware/efi/libstub/random.c
++++ b/drivers/firmware/efi/libstub/random.c
+@@ -75,7 +75,12 @@ efi_status_t efi_random_get_seed(void)
+       if (status != EFI_SUCCESS)
+               return status;
+-      status = efi_bs_call(allocate_pool, EFI_RUNTIME_SERVICES_DATA,
++      /*
++       * Use EFI_ACPI_RECLAIM_MEMORY here so that it is guaranteed that the
++       * allocation will survive a kexec reboot (although we refresh the seed
++       * beforehand)
++       */
++      status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY,
+                            sizeof(*seed) + EFI_RANDOM_SEED_SIZE,
+                            (void **)&seed);
+       if (status != EFI_SUCCESS)
diff --git a/queue-5.10/fuse-add-file_modified-to-fallocate.patch b/queue-5.10/fuse-add-file_modified-to-fallocate.patch
new file mode 100644 (file)
index 0000000..ac94bf3
--- /dev/null
@@ -0,0 +1,33 @@
+From 4a6f278d4827b59ba26ceae0ff4529ee826aa258 Mon Sep 17 00:00:00 2001
+From: Miklos Szeredi <mszeredi@redhat.com>
+Date: Fri, 28 Oct 2022 14:25:20 +0200
+Subject: fuse: add file_modified() to fallocate
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+commit 4a6f278d4827b59ba26ceae0ff4529ee826aa258 upstream.
+
+Add missing file_modified() call to fuse_file_fallocate().  Without this
+fallocate on fuse failed to clear privileges.
+
+Fixes: 05ba1f082300 ("fuse: add FALLOCATE operation")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/fuse/file.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -3311,6 +3311,10 @@ static long fuse_file_fallocate(struct f
+                       goto out;
+       }
++      err = file_modified(file);
++      if (err)
++              goto out;
++
+       if (!(mode & FALLOC_FL_KEEP_SIZE))
+               set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
diff --git a/queue-5.10/kprobe-reverse-kp-flags-when-arm_kprobe-failed.patch b/queue-5.10/kprobe-reverse-kp-flags-when-arm_kprobe-failed.patch
new file mode 100644 (file)
index 0000000..13f12ed
--- /dev/null
@@ -0,0 +1,41 @@
+From 4a6f316d6855a434f56dbbeba05e14c01acde8f8 Mon Sep 17 00:00:00 2001
+From: Li Qiang <liq3ea@163.com>
+Date: Fri, 4 Nov 2022 08:49:31 +0900
+Subject: kprobe: reverse kp->flags when arm_kprobe failed
+
+From: Li Qiang <liq3ea@163.com>
+
+commit 4a6f316d6855a434f56dbbeba05e14c01acde8f8 upstream.
+
+In aggregate kprobe case, when arm_kprobe failed,
+we need set the kp->flags with KPROBE_FLAG_DISABLED again.
+If not, the 'kp' kprobe will been considered as enabled
+but it actually not enabled.
+
+Link: https://lore.kernel.org/all/20220902155820.34755-1-liq3ea@163.com/
+
+Fixes: 12310e343755 ("kprobes: Propagate error from arm_kprobe_ftrace()")
+Cc: stable@vger.kernel.org
+Signed-off-by: Li Qiang <liq3ea@163.com>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/kprobes.c |    5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+--- a/kernel/kprobes.c
++++ b/kernel/kprobes.c
+@@ -2335,8 +2335,11 @@ int enable_kprobe(struct kprobe *kp)
+       if (!kprobes_all_disarmed && kprobe_disabled(p)) {
+               p->flags &= ~KPROBE_FLAG_DISABLED;
+               ret = arm_kprobe(p);
+-              if (ret)
++              if (ret) {
+                       p->flags |= KPROBE_FLAG_DISABLED;
++                      if (p != kp)
++                              kp->flags |= KPROBE_FLAG_DISABLED;
++              }
+       }
+ out:
+       mutex_unlock(&kprobe_mutex);
diff --git a/queue-5.10/perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch b/queue-5.10/perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch
new file mode 100644 (file)
index 0000000..ff65725
--- /dev/null
@@ -0,0 +1,35 @@
+From 6f8faf471446844bb9c318e0340221049d5c19f4 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 31 Oct 2022 08:45:50 -0700
+Subject: perf/x86/intel: Add Cooper Lake stepping to isolation_ucodes[]
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit 6f8faf471446844bb9c318e0340221049d5c19f4 upstream.
+
+The intel_pebs_isolation quirk checks both model number and stepping.
+Cooper Lake has a different stepping (11) than the other Skylake Xeon.
+It cannot benefit from the optimization in commit 9b545c04abd4f
+("perf/x86/kvm: Avoid unnecessary work in guest filtering").
+
+Add the stepping of Cooper Lake into the isolation_ucodes[] table.
+
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20221031154550.571663-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/arch/x86/events/intel/core.c
++++ b/arch/x86/events/intel/core.c
+@@ -4412,6 +4412,7 @@ static const struct x86_cpu_desc isolati
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             5, 0x00000000),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             6, 0x00000000),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,             7, 0x00000000),
++      INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_X,            11, 0x00000000),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE_L,             3, 0x0000007c),
+       INTEL_CPU_DESC(INTEL_FAM6_SKYLAKE,               3, 0x0000007c),
+       INTEL_CPU_DESC(INTEL_FAM6_KABYLAKE,              9, 0x0000004e),
diff --git a/queue-5.10/perf-x86-intel-fix-pebs-event-constraints-for-icl.patch b/queue-5.10/perf-x86-intel-fix-pebs-event-constraints-for-icl.patch
new file mode 100644 (file)
index 0000000..dd68d84
--- /dev/null
@@ -0,0 +1,41 @@
+From acc5568b90c19ac6375508a93b9676cd18a92a35 Mon Sep 17 00:00:00 2001
+From: Kan Liang <kan.liang@linux.intel.com>
+Date: Mon, 31 Oct 2022 08:41:18 -0700
+Subject: perf/x86/intel: Fix pebs event constraints for ICL
+
+From: Kan Liang <kan.liang@linux.intel.com>
+
+commit acc5568b90c19ac6375508a93b9676cd18a92a35 upstream.
+
+According to the latest event list, update the MEM_INST_RETIRED events
+which support the DataLA facility.
+
+Fixes: 6017608936c1 ("perf/x86/intel: Add Icelake support")
+Reported-by: Jannis Klinkenberg <jannis.klinkenberg@rwth-aachen.de>
+Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org
+Link: https://lkml.kernel.org/r/20221031154119.571386-1-kan.liang@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/events/intel/ds.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/events/intel/ds.c
++++ b/arch/x86/events/intel/ds.c
+@@ -855,8 +855,13 @@ struct event_constraint intel_icl_pebs_e
+       INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL),  /* SLOTS */
+       INTEL_PLD_CONSTRAINT(0x1cd, 0xff),                      /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+-      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf),    /* MEM_INST_RETIRED.LOAD */
+-      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x2d0, 0xf),    /* MEM_INST_RETIRED.STORE */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x11d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_LOADS */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x12d0, 0xf),   /* MEM_INST_RETIRED.STLB_MISS_STORES */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x21d0, 0xf),   /* MEM_INST_RETIRED.LOCK_LOADS */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x41d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_LOADS */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x42d0, 0xf),   /* MEM_INST_RETIRED.SPLIT_STORES */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x81d0, 0xf),   /* MEM_INST_RETIRED.ALL_LOADS */
++      INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_ST(0x82d0, 0xf),   /* MEM_INST_RETIRED.ALL_STORES */
+       INTEL_FLAGS_EVENT_CONSTRAINT_DATALA_LD_RANGE(0xd1, 0xd4, 0xf), /* MEM_LOAD_*_RETIRED.* */
index 6cb9b9275ba0b74f5c4d30ce7638415462a48395..7279a6c7fe3ce88027445c6bf136cfc6fe0cbfee 100644 (file)
@@ -87,3 +87,14 @@ binder-fix-uaf-of-alloc-vma-in-race-with-munmap.patch
 coresight-cti-fix-hang-in-cti_disable_hw.patch
 btrfs-fix-type-of-parameter-generation-in-btrfs_get_dentry.patch
 ftrace-fix-use-after-free-for-dynamic-ftrace_ops.patch
+tcp-udp-make-early_demux-back-namespacified.patch
+tracing-kprobe-fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch
+kprobe-reverse-kp-flags-when-arm_kprobe-failed.patch
+tools-nolibc-string-fix-memcmp-implementation.patch
+tracing-histogram-update-document-for-keys_max-size.patch
+capabilities-fix-potential-memleak-on-error-path-from-vfs_getxattr_alloc.patch
+fuse-add-file_modified-to-fallocate.patch
+efi-random-reduce-seed-size-to-32-bytes.patch
+efi-random-use-acpi-reclaim-memory-for-random-seed.patch
+perf-x86-intel-fix-pebs-event-constraints-for-icl.patch
+perf-x86-intel-add-cooper-lake-stepping-to-isolation_ucodes.patch
diff --git a/queue-5.10/tcp-udp-make-early_demux-back-namespacified.patch b/queue-5.10/tcp-udp-make-early_demux-back-namespacified.patch
new file mode 100644 (file)
index 0000000..0785218
--- /dev/null
@@ -0,0 +1,349 @@
+From 11052589cf5c0bab3b4884d423d5f60c38fcf25d Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 13 Jul 2022 10:52:07 -0700
+Subject: tcp/udp: Make early_demux back namespacified.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 11052589cf5c0bab3b4884d423d5f60c38fcf25d upstream.
+
+Commit e21145a9871a ("ipv4: namespacify ip_early_demux sysctl knob") made
+it possible to enable/disable early_demux on a per-netns basis.  Then, we
+introduced two knobs, tcp_early_demux and udp_early_demux, to switch it for
+TCP/UDP in commit dddb64bcb346 ("net: Add sysctl to toggle early demux for
+tcp and udp").  However, the .proc_handler() was wrong and actually
+disabled us from changing the behaviour in each netns.
+
+We can execute early_demux if net.ipv4.ip_early_demux is on and each proto
+.early_demux() handler is not NULL.  When we toggle (tcp|udp)_early_demux,
+the change itself is saved in each netns variable, but the .early_demux()
+handler is a global variable, so the handler is switched based on the
+init_net's sysctl variable.  Thus, netns (tcp|udp)_early_demux knobs have
+nothing to do with the logic.  Whether we CAN execute proto .early_demux()
+is always decided by init_net's sysctl knob, and whether we DO it or not is
+by each netns ip_early_demux knob.
+
+This patch namespacifies (tcp|udp)_early_demux again.  For now, the users
+of the .early_demux() handler are TCP and UDP only, and they are called
+directly to avoid retpoline.  So, we can remove the .early_demux() handler
+from inet6?_protos and need not dereference them in ip6?_rcv_finish_core().
+If another proto needs .early_demux(), we can restore it at that time.
+
+Fixes: dddb64bcb346 ("net: Add sysctl to toggle early demux for tcp and udp")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20220713175207.7727-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/protocol.h     |    4 ---
+ include/net/tcp.h          |    2 -
+ include/net/udp.h          |    1 
+ net/ipv4/af_inet.c         |   14 +---------
+ net/ipv4/ip_input.c        |   35 ++++++++++++++++----------
+ net/ipv4/sysctl_net_ipv4.c |   59 +--------------------------------------------
+ net/ipv6/ip6_input.c       |   26 +++++++++++--------
+ net/ipv6/tcp_ipv6.c        |    9 +-----
+ net/ipv6/udp.c             |    9 +-----
+ 9 files changed, 46 insertions(+), 113 deletions(-)
+
+--- a/include/net/protocol.h
++++ b/include/net/protocol.h
+@@ -35,8 +35,6 @@
+ /* This is used to register protocols. */
+ struct net_protocol {
+-      int                     (*early_demux)(struct sk_buff *skb);
+-      int                     (*early_demux_handler)(struct sk_buff *skb);
+       int                     (*handler)(struct sk_buff *skb);
+       /* This returns an error if we weren't able to handle the error. */
+@@ -53,8 +51,6 @@ struct net_protocol {
+ #if IS_ENABLED(CONFIG_IPV6)
+ struct inet6_protocol {
+-      void    (*early_demux)(struct sk_buff *skb);
+-      void    (*early_demux_handler)(struct sk_buff *skb);
+       int     (*handler)(struct sk_buff *skb);
+       /* This returns an error if we weren't able to handle the error. */
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -934,7 +934,7 @@ extern const struct inet_connection_sock
+ INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
+ INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
+-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
++void tcp_v6_early_demux(struct sk_buff *skb);
+ #endif
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -176,6 +176,7 @@ INDIRECT_CALLABLE_DECLARE(int udp6_gro_c
+ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
+                               struct udphdr *uh, struct sock *sk);
+ int udp_gro_complete(struct sk_buff *skb, int nhoff, udp_lookup_t lookup);
++void udp_v6_early_demux(struct sk_buff *skb);
+ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
+                                 netdev_features_t features, bool is_ipv6);
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -1726,12 +1726,7 @@ static const struct net_protocol igmp_pr
+ };
+ #endif
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct net_protocol tcp_protocol = {
+-      .early_demux    =       tcp_v4_early_demux,
+-      .early_demux_handler =  tcp_v4_early_demux,
++static const struct net_protocol tcp_protocol = {
+       .handler        =       tcp_v4_rcv,
+       .err_handler    =       tcp_v4_err,
+       .no_policy      =       1,
+@@ -1739,12 +1734,7 @@ static struct net_protocol tcp_protocol
+       .icmp_strict_tag_validation = 1,
+ };
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct net_protocol udp_protocol = {
+-      .early_demux =  udp_v4_early_demux,
+-      .early_demux_handler =  udp_v4_early_demux,
++static const struct net_protocol udp_protocol = {
+       .handler =      udp_rcv,
+       .err_handler =  udp_err,
+       .no_policy =    1,
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -309,14 +309,13 @@ static bool ip_can_use_hint(const struct
+              ip_hdr(hint)->tos == iph->tos;
+ }
+-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
+-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
++int tcp_v4_early_demux(struct sk_buff *skb);
++int udp_v4_early_demux(struct sk_buff *skb);
+ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
+                             struct sk_buff *skb, struct net_device *dev,
+                             const struct sk_buff *hint)
+ {
+       const struct iphdr *iph = ip_hdr(skb);
+-      int (*edemux)(struct sk_buff *skb);
+       struct rtable *rt;
+       int err;
+@@ -327,21 +326,29 @@ static int ip_rcv_finish_core(struct net
+                       goto drop_error;
+       }
+-      if (net->ipv4.sysctl_ip_early_demux &&
++      if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+           !skb_dst(skb) &&
+           !skb->sk &&
+           !ip_is_fragment(iph)) {
+-              const struct net_protocol *ipprot;
+-              int protocol = iph->protocol;
++              switch (iph->protocol) {
++              case IPPROTO_TCP:
++                      if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
++                              tcp_v4_early_demux(skb);
+-              ipprot = rcu_dereference(inet_protos[protocol]);
+-              if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
+-                      err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
+-                                            udp_v4_early_demux, skb);
+-                      if (unlikely(err))
+-                              goto drop_error;
+-                      /* must reload iph, skb->head might have changed */
+-                      iph = ip_hdr(skb);
++                              /* must reload iph, skb->head might have changed */
++                              iph = ip_hdr(skb);
++                      }
++                      break;
++              case IPPROTO_UDP:
++                      if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
++                              err = udp_v4_early_demux(skb);
++                              if (unlikely(err))
++                                      goto drop_error;
++
++                              /* must reload iph, skb->head might have changed */
++                              iph = ip_hdr(skb);
++                      }
++                      break;
+               }
+       }
+--- a/net/ipv4/sysctl_net_ipv4.c
++++ b/net/ipv4/sysctl_net_ipv4.c
+@@ -361,61 +361,6 @@ bad_key:
+       return ret;
+ }
+-static void proc_configure_early_demux(int enabled, int protocol)
+-{
+-      struct net_protocol *ipprot;
+-#if IS_ENABLED(CONFIG_IPV6)
+-      struct inet6_protocol *ip6prot;
+-#endif
+-
+-      rcu_read_lock();
+-
+-      ipprot = rcu_dereference(inet_protos[protocol]);
+-      if (ipprot)
+-              ipprot->early_demux = enabled ? ipprot->early_demux_handler :
+-                                              NULL;
+-
+-#if IS_ENABLED(CONFIG_IPV6)
+-      ip6prot = rcu_dereference(inet6_protos[protocol]);
+-      if (ip6prot)
+-              ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
+-                                               NULL;
+-#endif
+-      rcu_read_unlock();
+-}
+-
+-static int proc_tcp_early_demux(struct ctl_table *table, int write,
+-                              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      int ret = 0;
+-
+-      ret = proc_dointvec(table, write, buffer, lenp, ppos);
+-
+-      if (write && !ret) {
+-              int enabled = init_net.ipv4.sysctl_tcp_early_demux;
+-
+-              proc_configure_early_demux(enabled, IPPROTO_TCP);
+-      }
+-
+-      return ret;
+-}
+-
+-static int proc_udp_early_demux(struct ctl_table *table, int write,
+-                              void *buffer, size_t *lenp, loff_t *ppos)
+-{
+-      int ret = 0;
+-
+-      ret = proc_dointvec(table, write, buffer, lenp, ppos);
+-
+-      if (write && !ret) {
+-              int enabled = init_net.ipv4.sysctl_udp_early_demux;
+-
+-              proc_configure_early_demux(enabled, IPPROTO_UDP);
+-      }
+-
+-      return ret;
+-}
+-
+ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
+                                            int write, void *buffer,
+                                            size_t *lenp, loff_t *ppos)
+@@ -685,14 +630,14 @@ static struct ctl_table ipv4_net_table[]
+               .data           = &init_net.ipv4.sysctl_udp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_udp_early_demux
++              .proc_handler   = proc_douintvec_minmax,
+       },
+       {
+               .procname       = "tcp_early_demux",
+               .data           = &init_net.ipv4.sysctl_tcp_early_demux,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+-              .proc_handler   = proc_tcp_early_demux
++              .proc_handler   = proc_douintvec_minmax,
+       },
+       {
+               .procname       = "nexthop_compat_mode",
+--- a/net/ipv6/ip6_input.c
++++ b/net/ipv6/ip6_input.c
+@@ -44,21 +44,25 @@
+ #include <net/inet_ecn.h>
+ #include <net/dst_metadata.h>
+-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
++void udp_v6_early_demux(struct sk_buff *);
++void tcp_v6_early_demux(struct sk_buff *);
+ static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
+                               struct sk_buff *skb)
+ {
+-      void (*edemux)(struct sk_buff *skb);
+-
+-      if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
+-              const struct inet6_protocol *ipprot;
+-
+-              ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
+-              if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
+-                      INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
+-                                      udp_v6_early_demux, skb);
++      if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
++          !skb_dst(skb) && !skb->sk) {
++              switch (ipv6_hdr(skb)->nexthdr) {
++              case IPPROTO_TCP:
++                      if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
++                              tcp_v6_early_demux(skb);
++                      break;
++              case IPPROTO_UDP:
++                      if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
++                              udp_v6_early_demux(skb);
++                      break;
++              }
+       }
++
+       if (!skb_valid_dst(skb))
+               ip6_route_input(skb);
+ }
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1818,7 +1818,7 @@ do_time_wait:
+       goto discard_it;
+ }
+-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
++void tcp_v6_early_demux(struct sk_buff *skb)
+ {
+       const struct ipv6hdr *hdr;
+       const struct tcphdr *th;
+@@ -2169,12 +2169,7 @@ struct proto tcpv6_prot = {
+ };
+ EXPORT_SYMBOL_GPL(tcpv6_prot);
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct inet6_protocol tcpv6_protocol = {
+-      .early_demux    =       tcp_v6_early_demux,
+-      .early_demux_handler =  tcp_v6_early_demux,
++static const struct inet6_protocol tcpv6_protocol = {
+       .handler        =       tcp_v6_rcv,
+       .err_handler    =       tcp_v6_err,
+       .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -1027,7 +1027,7 @@ static struct sock *__udp6_lib_demux_loo
+       return NULL;
+ }
+-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
++void udp_v6_early_demux(struct sk_buff *skb)
+ {
+       struct net *net = dev_net(skb->dev);
+       const struct udphdr *uh;
+@@ -1640,12 +1640,7 @@ int udpv6_getsockopt(struct sock *sk, in
+       return ipv6_getsockopt(sk, level, optname, optval, optlen);
+ }
+-/* thinking of making this const? Don't.
+- * early_demux can change based on sysctl.
+- */
+-static struct inet6_protocol udpv6_protocol = {
+-      .early_demux    =       udp_v6_early_demux,
+-      .early_demux_handler =  udp_v6_early_demux,
++static const struct inet6_protocol udpv6_protocol = {
+       .handler        =       udpv6_rcv,
+       .err_handler    =       udpv6_err,
+       .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
diff --git a/queue-5.10/tools-nolibc-string-fix-memcmp-implementation.patch b/queue-5.10/tools-nolibc-string-fix-memcmp-implementation.patch
new file mode 100644 (file)
index 0000000..b7b765d
--- /dev/null
@@ -0,0 +1,45 @@
+From b3f4f51ea68a495f8a5956064c33dce711a2df91 Mon Sep 17 00:00:00 2001
+From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
+Date: Fri, 21 Oct 2022 08:01:53 +0200
+Subject: tools/nolibc/string: Fix memcmp() implementation
+
+From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
+
+commit b3f4f51ea68a495f8a5956064c33dce711a2df91 upstream.
+
+The C standard says that memcmp() must treat the buffers as consisting
+of "unsigned chars". If char happens to be unsigned, the casts are ok,
+but then obviously the c1 variable can never contain a negative
+value. And when char is signed, the casts are wrong, and there's still
+a problem with using an 8-bit quantity to hold the difference, because
+that can range from -255 to +255.
+
+For example, assuming char is signed, comparing two 1-byte buffers,
+one containing 0x00 and another 0x80, the current implementation would
+return -128 for both memcmp(a, b, 1) and memcmp(b, a, 1), whereas one
+of those should of course return something positive.
+
+Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
+Fixes: 66b6f755ad45 ("rcutorture: Import a copy of nolibc")
+Cc: stable@vger.kernel.org # v5.0+
+Signed-off-by: Willy Tarreau <w@1wt.eu>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/include/nolibc/nolibc.h |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/tools/include/nolibc/nolibc.h
++++ b/tools/include/nolibc/nolibc.h
+@@ -2318,9 +2318,9 @@ static __attribute__((unused))
+ int memcmp(const void *s1, const void *s2, size_t n)
+ {
+       size_t ofs = 0;
+-      char c1 = 0;
++      int c1 = 0;
+-      while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) {
++      while (ofs < n && !(c1 = ((unsigned char *)s1)[ofs] - ((unsigned char *)s2)[ofs])) {
+               ofs++;
+       }
+       return c1;
diff --git a/queue-5.10/tracing-histogram-update-document-for-keys_max-size.patch b/queue-5.10/tracing-histogram-update-document-for-keys_max-size.patch
new file mode 100644 (file)
index 0000000..00e2179
--- /dev/null
@@ -0,0 +1,33 @@
+From a635beeacc6d56d2b71c39e6c0103f85b53d108e Mon Sep 17 00:00:00 2001
+From: Zheng Yejian <zhengyejian1@huawei.com>
+Date: Mon, 17 Oct 2022 10:38:06 +0000
+Subject: tracing/histogram: Update document for KEYS_MAX size
+
+From: Zheng Yejian <zhengyejian1@huawei.com>
+
+commit a635beeacc6d56d2b71c39e6c0103f85b53d108e upstream.
+
+After commit 4f36c2d85ced ("tracing: Increase tracing map KEYS_MAX size"),
+'keys' supports up to three fields.
+
+Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
+Cc: stable@vger.kernel.org
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/r/20221017103806.2479139-1-zhengyejian1@huawei.com
+Signed-off-by: Jonathan Corbet <corbet@lwn.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/trace/histogram.rst |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/Documentation/trace/histogram.rst
++++ b/Documentation/trace/histogram.rst
+@@ -39,7 +39,7 @@ Documentation written by Tom Zanussi
+   will use the event's kernel stacktrace as the key.  The keywords
+   'keys' or 'key' can be used to specify keys, and the keywords
+   'values', 'vals', or 'val' can be used to specify values.  Compound
+-  keys consisting of up to two fields can be specified by the 'keys'
++  keys consisting of up to three fields can be specified by the 'keys'
+   keyword.  Hashing a compound key produces a unique entry in the
+   table for each unique combination of component keys, and can be
+   useful for providing more fine-grained summaries of event data.
diff --git a/queue-5.10/tracing-kprobe-fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch b/queue-5.10/tracing-kprobe-fix-memory-leak-in-test_gen_kprobe-kretprobe_cmd.patch
new file mode 100644 (file)
index 0000000..ac05821
--- /dev/null
@@ -0,0 +1,114 @@
+From 66f0919c953ef7b55e5ab94389a013da2ce80a2c Mon Sep 17 00:00:00 2001
+From: Shang XiaoJing <shangxiaojing@huawei.com>
+Date: Wed, 2 Nov 2022 15:29:54 +0800
+Subject: tracing: kprobe: Fix memory leak in test_gen_kprobe/kretprobe_cmd()
+
+From: Shang XiaoJing <shangxiaojing@huawei.com>
+
+commit 66f0919c953ef7b55e5ab94389a013da2ce80a2c upstream.
+
+test_gen_kprobe_cmd() only free buf in fail path, hence buf will leak
+when there is no failure. Move kfree(buf) from fail path to common path
+to prevent the memleak. The same reason and solution in
+test_gen_kretprobe_cmd().
+
+unreferenced object 0xffff888143b14000 (size 2048):
+  comm "insmod", pid 52490, jiffies 4301890980 (age 40.553s)
+  hex dump (first 32 bytes):
+    70 3a 6b 70 72 6f 62 65 73 2f 67 65 6e 5f 6b 70  p:kprobes/gen_kp
+    72 6f 62 65 5f 74 65 73 74 20 64 6f 5f 73 79 73  robe_test do_sys
+  backtrace:
+    [<000000006d7b836b>] kmalloc_trace+0x27/0xa0
+    [<0000000009528b5b>] 0xffffffffa059006f
+    [<000000008408b580>] do_one_initcall+0x87/0x2a0
+    [<00000000c4980a7e>] do_init_module+0xdf/0x320
+    [<00000000d775aad0>] load_module+0x3006/0x3390
+    [<00000000e9a74b80>] __do_sys_finit_module+0x113/0x1b0
+    [<000000003726480d>] do_syscall_64+0x35/0x80
+    [<000000003441e93b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Link: https://lore.kernel.org/all/20221102072954.26555-1-shangxiaojing@huawei.com/
+
+Fixes: 64836248dda2 ("tracing: Add kprobe event command generation test module")
+Cc: stable@vger.kernel.org
+Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/kprobe_event_gen_test.c |   18 +++++++-----------
+ 1 file changed, 7 insertions(+), 11 deletions(-)
+
+--- a/kernel/trace/kprobe_event_gen_test.c
++++ b/kernel/trace/kprobe_event_gen_test.c
+@@ -100,20 +100,20 @@ static int __init test_gen_kprobe_cmd(vo
+                                        KPROBE_GEN_TEST_FUNC,
+                                        KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1);
+       if (ret)
+-              goto free;
++              goto out;
+       /* Use kprobe_event_add_fields to add the rest of the fields */
+       ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3);
+       if (ret)
+-              goto free;
++              goto out;
+       /*
+        * This actually creates the event.
+        */
+       ret = kprobe_event_gen_cmd_end(&cmd);
+       if (ret)
+-              goto free;
++              goto out;
+       /*
+        * Now get the gen_kprobe_test event file.  We need to prevent
+@@ -136,13 +136,11 @@ static int __init test_gen_kprobe_cmd(vo
+               goto delete;
+       }
+  out:
++      kfree(buf);
+       return ret;
+  delete:
+       /* We got an error after creating the event, delete it */
+       ret = kprobe_event_delete("gen_kprobe_test");
+- free:
+-      kfree(buf);
+-
+       goto out;
+ }
+@@ -170,14 +168,14 @@ static int __init test_gen_kretprobe_cmd
+                                           KPROBE_GEN_TEST_FUNC,
+                                           "$retval");
+       if (ret)
+-              goto free;
++              goto out;
+       /*
+        * This actually creates the event.
+        */
+       ret = kretprobe_event_gen_cmd_end(&cmd);
+       if (ret)
+-              goto free;
++              goto out;
+       /*
+        * Now get the gen_kretprobe_test event file.  We need to
+@@ -201,13 +199,11 @@ static int __init test_gen_kretprobe_cmd
+               goto delete;
+       }
+  out:
++      kfree(buf);
+       return ret;
+  delete:
+       /* We got an error after creating the event, delete it */
+       ret = kprobe_event_delete("gen_kretprobe_test");
+- free:
+-      kfree(buf);
+-
+       goto out;
+ }