6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)
diff --git a/queue-6.1/arm64-dts-stratix10-fix-incorrect-i2c-property-for-scl-signal.patch b/queue-6.1/arm64-dts-stratix10-fix-incorrect-i2c-property-for-scl-signal.patch

new file mode 100644 (file)

index 0000000..4c121b0
--- /dev/null
+++ b/queue-6.1/arm64-dts-stratix10-fix-incorrect-i2c-property-for-scl-signal.patch
@@ -0,0 +1,43 @@
+From db66795f61354c373ecdadbdae1ed253a96c47cb Mon Sep 17 00:00:00 2001
+From: Dinh Nguyen <dinguyen@kernel.org>
+Date: Tue, 11 Jul 2023 15:44:30 -0500
+Subject: arm64: dts: stratix10: fix incorrect I2C property for SCL signal
+
+From: Dinh Nguyen <dinguyen@kernel.org>
+
+commit db66795f61354c373ecdadbdae1ed253a96c47cb upstream.
+
+The correct dts property for the SCL falling time is
+"i2c-scl-falling-time-ns".
+
+Fixes: c8da1d15b8a4 ("arm64: dts: stratix10: i2c clock running out of spec")
+Cc: stable@vger.kernel.org
+Signed-off-by: Dinh Nguyen <dinguyen@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts      |    2 +-
+ arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts |    2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
++++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts
+@@ -128,7 +128,7 @@
+       status = "okay";
+       clock-frequency = <100000>;
+       i2c-sda-falling-time-ns = <890>;  /* hcnt */
+-      i2c-sdl-falling-time-ns = <890>;  /* lcnt */
++      i2c-scl-falling-time-ns = <890>;  /* lcnt */
+ 
+       adc@14 {
+               compatible = "lltc,ltc2497";
+--- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
++++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts
+@@ -141,7 +141,7 @@
+       status = "okay";
+       clock-frequency = <100000>;
+       i2c-sda-falling-time-ns = <890>;  /* hcnt */
+-      i2c-sdl-falling-time-ns = <890>;  /* lcnt */
++      i2c-scl-falling-time-ns = <890>;  /* lcnt */
+ 
+       adc@14 {
+               compatible = "lltc,ltc2497";
diff --git a/queue-6.1/arm64-fpsimd-clear-sme-state-in-the-target-task-when-setting-the-vl.patch b/queue-6.1/arm64-fpsimd-clear-sme-state-in-the-target-task-when-setting-the-vl.patch

new file mode 100644 (file)

index 0000000..81f8732
--- /dev/null
+++ b/queue-6.1/arm64-fpsimd-clear-sme-state-in-the-target-task-when-setting-the-vl.patch
@@ -0,0 +1,40 @@
+From c9bb40b7f786662e33d71afe236442b0b61f0446 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 3 Aug 2023 00:46:39 +0100
+Subject: arm64/fpsimd: Clear SME state in the target task when setting the VL
+
+From: Mark Brown <broonie@kernel.org>
+
+commit c9bb40b7f786662e33d71afe236442b0b61f0446 upstream.
+
+When setting SME vector lengths we clear TIF_SME to reenable SME traps,
+doing a reallocation of the backing storage on next use. We do this using
+clear_thread_flag() which operates on the current thread, meaning that when
+setting the vector length via ptrace we may both not force traps for the
+target task and force a spurious flush of any SME state that the tracing
+task may have.
+
+Clear the flag in the target task.
+
+Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers")
+Reported-by: David Spickett <David.Spickett@arm.com>
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230803-arm64-fix-ptrace-tif-sme-v1-1-88312fd6fbfd@kernel.org
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -864,7 +864,7 @@ int vec_set_vector_length(struct task_st
+                        */
+                       task->thread.svcr &= ~(SVCR_SM_MASK |
+                                              SVCR_ZA_MASK);
+-                      clear_thread_flag(TIF_SME);
++                      clear_tsk_thread_flag(task, TIF_SME);
+                       free_sme = true;
+               }
+       }
diff --git a/queue-6.1/arm64-fpsimd-sync-and-zero-pad-fpsimd-state-for-streaming-sve.patch b/queue-6.1/arm64-fpsimd-sync-and-zero-pad-fpsimd-state-for-streaming-sve.patch

new file mode 100644 (file)

index 0000000..aab4771
--- /dev/null
+++ b/queue-6.1/arm64-fpsimd-sync-and-zero-pad-fpsimd-state-for-streaming-sve.patch
@@ -0,0 +1,38 @@
+From 69af56ae56a48a2522aad906c4461c6c7c092737 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 3 Aug 2023 19:33:23 +0100
+Subject: arm64/fpsimd: Sync and zero pad FPSIMD state for streaming SVE
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 69af56ae56a48a2522aad906c4461c6c7c092737 upstream.
+
+We have a function sve_sync_from_fpsimd_zeropad() which is used by the
+ptrace code to update the SVE state when the user writes to the the
+FPSIMD register set.  Currently this checks that the task has SVE
+enabled but this will miss updates for tasks which have streaming SVE
+enabled if SVE has not been enabled for the thread, also do the
+conversion if the task has streaming SVE enabled.
+
+Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230803-arm64-fix-ptrace-ssve-no-sve-v1-3-49df214bfb3e@kernel.org
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -791,7 +791,8 @@ void sve_sync_from_fpsimd_zeropad(struct
+       void *sst = task->thread.sve_state;
+       struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
+ 
+-      if (!test_tsk_thread_flag(task, TIF_SVE))
++      if (!test_tsk_thread_flag(task, TIF_SVE) &&
++          !thread_sm_enabled(&task->thread))
+               return;
+ 
+       vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
diff --git a/queue-6.1/arm64-fpsimd-sync-fpsimd-state-with-sve-for-sme-only-systems.patch b/queue-6.1/arm64-fpsimd-sync-fpsimd-state-with-sve-for-sme-only-systems.patch

new file mode 100644 (file)

index 0000000..1ab66a7
--- /dev/null
+++ b/queue-6.1/arm64-fpsimd-sync-fpsimd-state-with-sve-for-sme-only-systems.patch
@@ -0,0 +1,44 @@
+From 507ea5dd92d23fcf10e4d1a68a443c86a49753ed Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Thu, 3 Aug 2023 19:33:22 +0100
+Subject: arm64/fpsimd: Sync FPSIMD state with SVE for SME only systems
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 507ea5dd92d23fcf10e4d1a68a443c86a49753ed upstream.
+
+Currently we guard FPSIMD/SVE state conversions with a check for the system
+supporting SVE but SME only systems may need to sync streaming mode SVE
+state so add a check for SME support too.  These functions are only used
+by the ptrace code.
+
+Fixes: e12310a0d30f ("arm64/sme: Implement ptrace support for streaming mode SVE registers")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20230803-arm64-fix-ptrace-ssve-no-sve-v1-2-49df214bfb3e@kernel.org
+Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm64/kernel/fpsimd.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/arm64/kernel/fpsimd.c
++++ b/arch/arm64/kernel/fpsimd.c
+@@ -634,7 +634,7 @@ static void fpsimd_to_sve(struct task_st
+       void *sst = task->thread.sve_state;
+       struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
+ 
+-      if (!system_supports_sve())
++      if (!system_supports_sve() && !system_supports_sme())
+               return;
+ 
+       vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread));
+@@ -660,7 +660,7 @@ static void sve_to_fpsimd(struct task_st
+       unsigned int i;
+       __uint128_t const *p;
+ 
+-      if (!system_supports_sve())
++      if (!system_supports_sve() && !system_supports_sme())
+               return;
+ 
+       vl = thread_get_cur_vl(&task->thread);
diff --git a/queue-6.1/bpf-disable-preemption-in-bpf_event_output.patch b/queue-6.1/bpf-disable-preemption-in-bpf_event_output.patch

new file mode 100644 (file)

index 0000000..e79b6ab
--- /dev/null
+++ b/queue-6.1/bpf-disable-preemption-in-bpf_event_output.patch
@@ -0,0 +1,91 @@
+From d62cc390c2e99ae267ffe4b8d7e2e08b6c758c32 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Tue, 25 Jul 2023 10:42:06 +0200
+Subject: bpf: Disable preemption in bpf_event_output
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit d62cc390c2e99ae267ffe4b8d7e2e08b6c758c32 upstream.
+
+We received report [1] of kernel crash, which is caused by
+using nesting protection without disabled preemption.
+
+The bpf_event_output can be called by programs executed by
+bpf_prog_run_array_cg function that disabled migration but
+keeps preemption enabled.
+
+This can cause task to be preempted by another one inside the
+nesting protection and lead eventually to two tasks using same
+perf_sample_data buffer and cause crashes like:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000001
+  #PF: supervisor instruction fetch in kernel mode
+  #PF: error_code(0x0010) - not-present page
+  ...
+  ? perf_output_sample+0x12a/0x9a0
+  ? finish_task_switch.isra.0+0x81/0x280
+  ? perf_event_output+0x66/0xa0
+  ? bpf_event_output+0x13a/0x190
+  ? bpf_event_output_data+0x22/0x40
+  ? bpf_prog_dfc84bbde731b257_cil_sock4_connect+0x40a/0xacb
+  ? xa_load+0x87/0xe0
+  ? __cgroup_bpf_run_filter_sock_addr+0xc1/0x1a0
+  ? release_sock+0x3e/0x90
+  ? sk_setsockopt+0x1a1/0x12f0
+  ? udp_pre_connect+0x36/0x50
+  ? inet_dgram_connect+0x93/0xa0
+  ? __sys_connect+0xb4/0xe0
+  ? udp_setsockopt+0x27/0x40
+  ? __pfx_udp_push_pending_frames+0x10/0x10
+  ? __sys_setsockopt+0xdf/0x1a0
+  ? __x64_sys_connect+0xf/0x20
+  ? do_syscall_64+0x3a/0x90
+  ? entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+Fixing this by disabling preemption in bpf_event_output.
+
+[1] https://github.com/cilium/cilium/issues/26756
+Cc: stable@vger.kernel.org
+Reported-by: Oleg "livelace" Popov <o.popov@livelace.ru>
+Closes: https://github.com/cilium/cilium/issues/26756
+Fixes: 2a916f2f546c ("bpf: Use migrate_disable/enable in array macros and cgroup/lirc code.")
+Acked-by: Hou Tao <houtao1@huawei.com>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/r/20230725084206.580930-3-jolsa@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -720,7 +720,6 @@ static DEFINE_PER_CPU(struct bpf_trace_s
+ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size,
+                    void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy)
+ {
+-      int nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
+       struct perf_raw_frag frag = {
+               .copy           = ctx_copy,
+               .size           = ctx_size,
+@@ -737,8 +736,12 @@ u64 bpf_event_output(struct bpf_map *map
+       };
+       struct perf_sample_data *sd;
+       struct pt_regs *regs;
++      int nest_level;
+       u64 ret;
+ 
++      preempt_disable();
++      nest_level = this_cpu_inc_return(bpf_event_output_nest_level);
++
+       if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(bpf_misc_sds.sds))) {
+               ret = -EBUSY;
+               goto out;
+@@ -754,6 +757,7 @@ u64 bpf_event_output(struct bpf_map *map
+       ret = __bpf_perf_event_output(regs, map, flags, sd);
+ out:
+       this_cpu_dec(bpf_event_output_nest_level);
++      preempt_enable();
+       return ret;
+ }
+ 
diff --git a/queue-6.1/bpf-disable-preemption-in-bpf_perf_event_output.patch b/queue-6.1/bpf-disable-preemption-in-bpf_perf_event_output.patch

new file mode 100644 (file)

index 0000000..191a055
--- /dev/null
+++ b/queue-6.1/bpf-disable-preemption-in-bpf_perf_event_output.patch
@@ -0,0 +1,89 @@
+From f2c67a3e60d1071b65848efaa8c3b66c363dd025 Mon Sep 17 00:00:00 2001
+From: Jiri Olsa <jolsa@kernel.org>
+Date: Tue, 25 Jul 2023 10:42:05 +0200
+Subject: bpf: Disable preemption in bpf_perf_event_output
+
+From: Jiri Olsa <jolsa@kernel.org>
+
+commit f2c67a3e60d1071b65848efaa8c3b66c363dd025 upstream.
+
+The nesting protection in bpf_perf_event_output relies on disabled
+preemption, which is guaranteed for kprobes and tracepoints.
+
+However bpf_perf_event_output can be also called from uprobes context
+through bpf_prog_run_array_sleepable function which disables migration,
+but keeps preemption enabled.
+
+This can cause task to be preempted by another one inside the nesting
+protection and lead eventually to two tasks using same perf_sample_data
+buffer and cause crashes like:
+
+  kernel tried to execute NX-protected page - exploit attempt? (uid: 0)
+  BUG: unable to handle page fault for address: ffffffff82be3eea
+  ...
+  Call Trace:
+   ? __die+0x1f/0x70
+   ? page_fault_oops+0x176/0x4d0
+   ? exc_page_fault+0x132/0x230
+   ? asm_exc_page_fault+0x22/0x30
+   ? perf_output_sample+0x12b/0x910
+   ? perf_event_output+0xd0/0x1d0
+   ? bpf_perf_event_output+0x162/0x1d0
+   ? bpf_prog_c6271286d9a4c938_krava1+0x76/0x87
+   ? __uprobe_perf_func+0x12b/0x540
+   ? uprobe_dispatcher+0x2c4/0x430
+   ? uprobe_notify_resume+0x2da/0xce0
+   ? atomic_notifier_call_chain+0x7b/0x110
+   ? exit_to_user_mode_prepare+0x13e/0x290
+   ? irqentry_exit_to_user_mode+0x5/0x30
+   ? asm_exc_int3+0x35/0x40
+
+Fixing this by disabling preemption in bpf_perf_event_output.
+
+Cc: stable@vger.kernel.org
+Fixes: 8c7dcb84e3b7 ("bpf: implement sleepable uprobes by chaining gps")
+Acked-by: Hou Tao <houtao1@huawei.com>
+Signed-off-by: Jiri Olsa <jolsa@kernel.org>
+Link: https://lore.kernel.org/r/20230725084206.580930-2-jolsa@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/trace/bpf_trace.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/kernel/trace/bpf_trace.c
++++ b/kernel/trace/bpf_trace.c
+@@ -662,8 +662,7 @@ static DEFINE_PER_CPU(int, bpf_trace_nes
+ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map,
+          u64, flags, void *, data, u64, size)
+ {
+-      struct bpf_trace_sample_data *sds = this_cpu_ptr(&bpf_trace_sds);
+-      int nest_level = this_cpu_inc_return(bpf_trace_nest_level);
++      struct bpf_trace_sample_data *sds;
+       struct perf_raw_record raw = {
+               .frag = {
+                       .size = size,
+@@ -671,7 +670,11 @@ BPF_CALL_5(bpf_perf_event_output, struct
+               },
+       };
+       struct perf_sample_data *sd;
+-      int err;
++      int nest_level, err;
++
++      preempt_disable();
++      sds = this_cpu_ptr(&bpf_trace_sds);
++      nest_level = this_cpu_inc_return(bpf_trace_nest_level);
+ 
+       if (WARN_ON_ONCE(nest_level > ARRAY_SIZE(sds->sds))) {
+               err = -EBUSY;
+@@ -690,9 +693,9 @@ BPF_CALL_5(bpf_perf_event_output, struct
+       sd->sample_flags |= PERF_SAMPLE_RAW;
+ 
+       err = __bpf_perf_event_output(regs, map, flags, sd);
+-
+ out:
+       this_cpu_dec(bpf_trace_nest_level);
++      preempt_enable();
+       return err;
+ }
+ 
diff --git a/queue-6.1/ceph-defer-stopping-mdsc-delayed_work.patch b/queue-6.1/ceph-defer-stopping-mdsc-delayed_work.patch

new file mode 100644 (file)

index 0000000..fdde004
--- /dev/null
+++ b/queue-6.1/ceph-defer-stopping-mdsc-delayed_work.patch
@@ -0,0 +1,79 @@
+From e7e607bd00481745550389a29ecabe33e13d67cf Mon Sep 17 00:00:00 2001
+From: Xiubo Li <xiubli@redhat.com>
+Date: Tue, 25 Jul 2023 12:03:59 +0800
+Subject: ceph: defer stopping mdsc delayed_work
+
+From: Xiubo Li <xiubli@redhat.com>
+
+commit e7e607bd00481745550389a29ecabe33e13d67cf upstream.
+
+Flushing the dirty buffer may take a long time if the cluster is
+overloaded or if there is network issue. So we should ping the
+MDSs periodically to keep alive, else the MDS will blocklist
+the kclient.
+
+Cc: stable@vger.kernel.org
+Link: https://tracker.ceph.com/issues/61843
+Signed-off-by: Xiubo Li <xiubli@redhat.com>
+Reviewed-by: Milind Changire <mchangir@redhat.com>
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ceph/mds_client.c |    4 ++--
+ fs/ceph/mds_client.h |    5 +++++
+ fs/ceph/super.c      |   10 ++++++++++
+ 3 files changed, 17 insertions(+), 2 deletions(-)
+
+--- a/fs/ceph/mds_client.c
++++ b/fs/ceph/mds_client.c
+@@ -4758,7 +4758,7 @@ static void delayed_work(struct work_str
+ 
+       dout("mdsc delayed_work\n");
+ 
+-      if (mdsc->stopping)
++      if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
+               return;
+ 
+       mutex_lock(&mdsc->mutex);
+@@ -4937,7 +4937,7 @@ void send_flush_mdlog(struct ceph_mds_se
+ void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
+ {
+       dout("pre_umount\n");
+-      mdsc->stopping = 1;
++      mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
+ 
+       ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
+       ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);
+--- a/fs/ceph/mds_client.h
++++ b/fs/ceph/mds_client.h
+@@ -380,6 +380,11 @@ struct cap_wait {
+       int                     want;
+ };
+ 
++enum {
++       CEPH_MDSC_STOPPING_BEGIN = 1,
++       CEPH_MDSC_STOPPING_FLUSHED = 2,
++};
++
+ /*
+  * mds client state
+  */
+--- a/fs/ceph/super.c
++++ b/fs/ceph/super.c
+@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_bl
+       ceph_mdsc_pre_umount(fsc->mdsc);
+       flush_fs_workqueues(fsc);
+ 
++      /*
++       * Though the kill_anon_super() will finally trigger the
++       * sync_filesystem() anyway, we still need to do it here
++       * and then bump the stage of shutdown to stop the work
++       * queue as earlier as possible.
++       */
++      sync_filesystem(s);
++
++      fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
++
+       kill_anon_super(s);
+ 
+       fsc->client->extra_mon_dispatch = NULL;
diff --git a/queue-6.1/drm-i915-fix-premature-release-of-request-s-reusable-memory.patch b/queue-6.1/drm-i915-fix-premature-release-of-request-s-reusable-memory.patch

new file mode 100644 (file)

index 0000000..3e79ca2
--- /dev/null
+++ b/queue-6.1/drm-i915-fix-premature-release-of-request-s-reusable-memory.patch
@@ -0,0 +1,269 @@
+From a337b64f0d5717248a0c894e2618e658e6a9de9f Mon Sep 17 00:00:00 2001
+From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+Date: Thu, 20 Jul 2023 11:35:44 +0200
+Subject: drm/i915: Fix premature release of request's reusable memory
+
+From: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+
+commit a337b64f0d5717248a0c894e2618e658e6a9de9f upstream.
+
+Infinite waits for completion of GPU activity have been observed in CI,
+mostly inside __i915_active_wait(), triggered by igt@gem_barrier_race or
+igt@perf@stress-open-close.  Root cause analysis, based of ftrace dumps
+generated with a lot of extra trace_printk() calls added to the code,
+revealed loops of request dependencies being accidentally built,
+preventing the requests from being processed, each waiting for completion
+of another one's activity.
+
+After we substitute a new request for a last active one tracked on a
+timeline, we set up a dependency of our new request to wait on completion
+of current activity of that previous one.  While doing that, we must take
+care of keeping the old request still in memory until we use its
+attributes for setting up that await dependency, or we can happen to set
+up the await dependency on an unrelated request that already reuses the
+memory previously allocated to the old one, already released.  Combined
+with perf adding consecutive kernel context remote requests to different
+user context timelines, unresolvable loops of await dependencies can be
+built, leading do infinite waits.
+
+We obtain a pointer to the previous request to wait upon when we
+substitute it with a pointer to our new request in an active tracker,
+e.g. in intel_timeline.last_request.  In some processing paths we protect
+that old request from being freed before we use it by getting a reference
+to it under RCU protection, but in others, e.g.  __i915_request_commit()
+-> __i915_request_add_to_timeline() -> __i915_request_ensure_ordering(),
+we don't.  But anyway, since the requests' memory is SLAB_FAILSAFE_BY_RCU,
+that RCU protection is not sufficient against reuse of memory.
+
+We could protect i915_request's memory from being prematurely reused by
+calling its release function via call_rcu() and using rcu_read_lock()
+consequently, as proposed in v1.  However, that approach leads to
+significant (up to 10 times) increase of SLAB utilization by i915_request
+SLAB cache.  Another potential approach is to take a reference to the
+previous active fence.
+
+When updating an active fence tracker, we first lock the new fence,
+substitute a pointer of the current active fence with the new one, then we
+lock the substituted fence.  With this approach, there is a time window
+after the substitution and before the lock when the request can be
+concurrently released by an interrupt handler and its memory reused, then
+we may happen to lock and return a new, unrelated request.
+
+Always get a reference to the current active fence first, before
+replacing it with a new one.  Having it protected from premature release
+and reuse, lock it and then replace with the new one but only if not
+yet signalled via a potential concurrent interrupt nor replaced with
+another one by a potential concurrent thread, otherwise retry, starting
+from getting a reference to the new current one.  Adjust users to not
+get a reference to the previous active fence themselves and always put the
+reference got by __i915_active_fence_set() when no longer needed.
+
+v3: Fix lockdep splat reports and other issues caused by incorrect use of
+    try_cmpxchg() (use (cmpxchg() != prev) instead)
+v2: Protect request's memory by getting a reference to it in favor of
+    delegating its release to call_rcu() (Chris)
+
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/8211
+Fixes: df9f85d8582e ("drm/i915: Serialise i915_active_fence_set() with itself")
+Suggested-by: Chris Wilson <chris@chris-wilson.co.uk>
+Signed-off-by: Janusz Krzysztofik <janusz.krzysztofik@linux.intel.com>
+Cc: <stable@vger.kernel.org> # v5.6+
+Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230720093543.832147-2-janusz.krzysztofik@linux.intel.com
+(cherry picked from commit 946e047a3d88d46d15b5c5af0414098e12b243f7)
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/i915_active.c  |   99 +++++++++++++++++++++++++-----------
+ drivers/gpu/drm/i915/i915_request.c |   11 ++++
+ 2 files changed, 81 insertions(+), 29 deletions(-)
+
+--- a/drivers/gpu/drm/i915/i915_active.c
++++ b/drivers/gpu/drm/i915/i915_active.c
+@@ -449,8 +449,11 @@ int i915_active_add_request(struct i915_
+               }
+       } while (unlikely(is_barrier(active)));
+ 
+-      if (!__i915_active_fence_set(active, fence))
++      fence = __i915_active_fence_set(active, fence);
++      if (!fence)
+               __i915_active_acquire(ref);
++      else
++              dma_fence_put(fence);
+ 
+ out:
+       i915_active_release(ref);
+@@ -469,13 +472,9 @@ __i915_active_set_fence(struct i915_acti
+               return NULL;
+       }
+ 
+-      rcu_read_lock();
+       prev = __i915_active_fence_set(active, fence);
+-      if (prev)
+-              prev = dma_fence_get_rcu(prev);
+-      else
++      if (!prev)
+               __i915_active_acquire(ref);
+-      rcu_read_unlock();
+ 
+       return prev;
+ }
+@@ -1019,10 +1018,11 @@ void i915_request_add_active_barriers(st
+  *
+  * Records the new @fence as the last active fence along its timeline in
+  * this active tracker, moving the tracking callbacks from the previous
+- * fence onto this one. Returns the previous fence (if not already completed),
+- * which the caller must ensure is executed before the new fence. To ensure
+- * that the order of fences within the timeline of the i915_active_fence is
+- * understood, it should be locked by the caller.
++ * fence onto this one. Gets and returns a reference to the previous fence
++ * (if not already completed), which the caller must put after making sure
++ * that it is executed before the new fence. To ensure that the order of
++ * fences within the timeline of the i915_active_fence is understood, it
++ * should be locked by the caller.
+  */
+ struct dma_fence *
+ __i915_active_fence_set(struct i915_active_fence *active,
+@@ -1031,7 +1031,23 @@ __i915_active_fence_set(struct i915_acti
+       struct dma_fence *prev;
+       unsigned long flags;
+ 
+-      if (fence == rcu_access_pointer(active->fence))
++      /*
++       * In case of fences embedded in i915_requests, their memory is
++       * SLAB_FAILSAFE_BY_RCU, then it can be reused right after release
++       * by new requests.  Then, there is a risk of passing back a pointer
++       * to a new, completely unrelated fence that reuses the same memory
++       * while tracked under a different active tracker.  Combined with i915
++       * perf open/close operations that build await dependencies between
++       * engine kernel context requests and user requests from different
++       * timelines, this can lead to dependency loops and infinite waits.
++       *
++       * As a countermeasure, we try to get a reference to the active->fence
++       * first, so if we succeed and pass it back to our user then it is not
++       * released and potentially reused by an unrelated request before the
++       * user has a chance to set up an await dependency on it.
++       */
++      prev = i915_active_fence_get(active);
++      if (fence == prev)
+               return fence;
+ 
+       GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));
+@@ -1040,27 +1056,56 @@ __i915_active_fence_set(struct i915_acti
+        * Consider that we have two threads arriving (A and B), with
+        * C already resident as the active->fence.
+        *
+-       * A does the xchg first, and so it sees C or NULL depending
+-       * on the timing of the interrupt handler. If it is NULL, the
+-       * previous fence must have been signaled and we know that
+-       * we are first on the timeline. If it is still present,
+-       * we acquire the lock on that fence and serialise with the interrupt
+-       * handler, in the process removing it from any future interrupt
+-       * callback. A will then wait on C before executing (if present).
+-       *
+-       * As B is second, it sees A as the previous fence and so waits for
+-       * it to complete its transition and takes over the occupancy for
+-       * itself -- remembering that it needs to wait on A before executing.
++       * Both A and B have got a reference to C or NULL, depending on the
++       * timing of the interrupt handler.  Let's assume that if A has got C
++       * then it has locked C first (before B).
+        *
+        * Note the strong ordering of the timeline also provides consistent
+        * nesting rules for the fence->lock; the inner lock is always the
+        * older lock.
+        */
+       spin_lock_irqsave(fence->lock, flags);
+-      prev = xchg(__active_fence_slot(active), fence);
+-      if (prev) {
+-              GEM_BUG_ON(prev == fence);
++      if (prev)
+               spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
++
++      /*
++       * A does the cmpxchg first, and so it sees C or NULL, as before, or
++       * something else, depending on the timing of other threads and/or
++       * interrupt handler.  If not the same as before then A unlocks C if
++       * applicable and retries, starting from an attempt to get a new
++       * active->fence.  Meanwhile, B follows the same path as A.
++       * Once A succeeds with cmpxch, B fails again, retires, gets A from
++       * active->fence, locks it as soon as A completes, and possibly
++       * succeeds with cmpxchg.
++       */
++      while (cmpxchg(__active_fence_slot(active), prev, fence) != prev) {
++              if (prev) {
++                      spin_unlock(prev->lock);
++                      dma_fence_put(prev);
++              }
++              spin_unlock_irqrestore(fence->lock, flags);
++
++              prev = i915_active_fence_get(active);
++              GEM_BUG_ON(prev == fence);
++
++              spin_lock_irqsave(fence->lock, flags);
++              if (prev)
++                      spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
++      }
++
++      /*
++       * If prev is NULL then the previous fence must have been signaled
++       * and we know that we are first on the timeline.  If it is still
++       * present then, having the lock on that fence already acquired, we
++       * serialise with the interrupt handler, in the process of removing it
++       * from any future interrupt callback.  A will then wait on C before
++       * executing (if present).
++       *
++       * As B is second, it sees A as the previous fence and so waits for
++       * it to complete its transition and takes over the occupancy for
++       * itself -- remembering that it needs to wait on A before executing.
++       */
++      if (prev) {
+               __list_del_entry(&active->cb.node);
+               spin_unlock(prev->lock); /* serialise with prev->cb_list */
+       }
+@@ -1077,11 +1122,7 @@ int i915_active_fence_set(struct i915_ac
+       int err = 0;
+ 
+       /* Must maintain timeline ordering wrt previous active requests */
+-      rcu_read_lock();
+       fence = __i915_active_fence_set(active, &rq->fence);
+-      if (fence) /* but the previous fence may not belong to that timeline! */
+-              fence = dma_fence_get_rcu(fence);
+-      rcu_read_unlock();
+       if (fence) {
+               err = i915_request_await_dma_fence(rq, fence);
+               dma_fence_put(fence);
+--- a/drivers/gpu/drm/i915/i915_request.c
++++ b/drivers/gpu/drm/i915/i915_request.c
+@@ -1647,6 +1647,11 @@ __i915_request_ensure_parallel_ordering(
+ 
+       request_to_parent(rq)->parallel.last_rq = i915_request_get(rq);
+ 
++      /*
++       * Users have to put a reference potentially got by
++       * __i915_active_fence_set() to the returned request
++       * when no longer needed
++       */
+       return to_request(__i915_active_fence_set(&timeline->last_request,
+                                                 &rq->fence));
+ }
+@@ -1693,6 +1698,10 @@ __i915_request_ensure_ordering(struct i9
+                                                        0);
+       }
+ 
++      /*
++       * Users have to put the reference to prev potentially got
++       * by __i915_active_fence_set() when no longer needed
++       */
+       return prev;
+ }
+ 
+@@ -1736,6 +1745,8 @@ __i915_request_add_to_timeline(struct i9
+               prev = __i915_request_ensure_ordering(rq, timeline);
+       else
+               prev = __i915_request_ensure_parallel_ordering(rq, timeline);
++      if (prev)
++              i915_request_put(prev);
+ 
+       /*
+        * Make sure that no request gazumped us - if it was allocated after
diff --git a/queue-6.1/drm-i915-gt-cleanup-aux-invalidation-registers.patch b/queue-6.1/drm-i915-gt-cleanup-aux-invalidation-registers.patch

new file mode 100644 (file)

index 0000000..636a1ea
--- /dev/null
+++ b/queue-6.1/drm-i915-gt-cleanup-aux-invalidation-registers.patch
@@ -0,0 +1,117 @@
+From d14560ac1b595aa2e792365e91fea6aeaee66c2b Mon Sep 17 00:00:00 2001
+From: Andi Shyti <andi.shyti@linux.intel.com>
+Date: Tue, 25 Jul 2023 02:19:44 +0200
+Subject: drm/i915/gt: Cleanup aux invalidation registers
+
+From: Andi Shyti <andi.shyti@linux.intel.com>
+
+commit d14560ac1b595aa2e792365e91fea6aeaee66c2b upstream.
+
+Fix the 'NV' definition postfix that is supposed to be INV.
+
+Take the chance to also order properly the registers based on
+their address and call the GEN12_GFX_CCS_AUX_INV address as
+GEN12_CCS_AUX_INV like all the other similar registers.
+
+Remove also VD1, VD3 and VE1 registers that don't exist and add
+BCS0 and CCS0.
+
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Cc: <stable@vger.kernel.org> # v5.8+
+Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
+Reviewed-by: Andrzej Hajda <andrzej.hajda@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230725001950.1014671-2-andi.shyti@linux.intel.com
+(cherry picked from commit 2f0b927d3ca3440445975ebde27f3df1c3ed6f76)
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/gt/gen8_engine_cs.c |    8 ++++----
+ drivers/gpu/drm/i915/gt/intel_gt_regs.h  |   16 ++++++++--------
+ drivers/gpu/drm/i915/gt/intel_lrc.c      |    6 +++---
+ 3 files changed, 15 insertions(+), 15 deletions(-)
+
+--- a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
++++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c
+@@ -256,8 +256,8 @@ int gen12_emit_flush_rcs(struct i915_req
+ 
+               if (!HAS_FLAT_CCS(rq->engine->i915)) {
+                       /* hsdes: 1809175790 */
+-                      cs = gen12_emit_aux_table_inv(rq->engine->gt,
+-                                                    cs, GEN12_GFX_CCS_AUX_NV);
++                      cs = gen12_emit_aux_table_inv(rq->engine->gt, cs,
++                                                    GEN12_CCS_AUX_INV);
+               }
+ 
+               *cs++ = preparser_disable(false);
+@@ -317,10 +317,10 @@ int gen12_emit_flush_xcs(struct i915_req
+       if (aux_inv) { /* hsdes: 1809175790 */
+               if (rq->engine->class == VIDEO_DECODE_CLASS)
+                       cs = gen12_emit_aux_table_inv(rq->engine->gt,
+-                                                    cs, GEN12_VD0_AUX_NV);
++                                                    cs, GEN12_VD0_AUX_INV);
+               else
+                       cs = gen12_emit_aux_table_inv(rq->engine->gt,
+-                                                    cs, GEN12_VE0_AUX_NV);
++                                                    cs, GEN12_VE0_AUX_INV);
+       }
+ 
+       if (mode & EMIT_INVALIDATE)
+--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
++++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+@@ -301,9 +301,11 @@
+ #define GEN8_PRIVATE_PAT_HI                   _MMIO(0x40e0 + 4)
+ #define GEN10_PAT_INDEX(index)                        _MMIO(0x40e0 + (index) * 4)
+ #define BSD_HWS_PGA_GEN7                      _MMIO(0x4180)
+-#define GEN12_GFX_CCS_AUX_NV                  _MMIO(0x4208)
+-#define GEN12_VD0_AUX_NV                      _MMIO(0x4218)
+-#define GEN12_VD1_AUX_NV                      _MMIO(0x4228)
++
++#define GEN12_CCS_AUX_INV                     _MMIO(0x4208)
++#define GEN12_VD0_AUX_INV                     _MMIO(0x4218)
++#define GEN12_VE0_AUX_INV                     _MMIO(0x4238)
++#define GEN12_BCS0_AUX_INV                    _MMIO(0x4248)
+ 
+ #define GEN8_RTCR                             _MMIO(0x4260)
+ #define GEN8_M1TCR                            _MMIO(0x4264)
+@@ -311,14 +313,12 @@
+ #define GEN8_BTCR                             _MMIO(0x426c)
+ #define GEN8_VTCR                             _MMIO(0x4270)
+ 
+-#define GEN12_VD2_AUX_NV                      _MMIO(0x4298)
+-#define GEN12_VD3_AUX_NV                      _MMIO(0x42a8)
+-#define GEN12_VE0_AUX_NV                      _MMIO(0x4238)
+-
+ #define BLT_HWS_PGA_GEN7                      _MMIO(0x4280)
+ 
+-#define GEN12_VE1_AUX_NV                      _MMIO(0x42b8)
++#define GEN12_VD2_AUX_INV                     _MMIO(0x4298)
++#define GEN12_CCS0_AUX_INV                    _MMIO(0x42c8)
+ #define   AUX_INV                             REG_BIT(0)
++
+ #define VEBOX_HWS_PGA_GEN7                    _MMIO(0x4380)
+ 
+ #define GEN12_AUX_ERR_DBG                     _MMIO(0x43f4)
+--- a/drivers/gpu/drm/i915/gt/intel_lrc.c
++++ b/drivers/gpu/drm/i915/gt/intel_lrc.c
+@@ -1299,7 +1299,7 @@ gen12_emit_indirect_ctx_rcs(const struct
+       /* hsdes: 1809175790 */
+       if (!HAS_FLAT_CCS(ce->engine->i915))
+               cs = gen12_emit_aux_table_inv(ce->engine->gt,
+-                                            cs, GEN12_GFX_CCS_AUX_NV);
++                                            cs, GEN12_CCS_AUX_INV);
+ 
+       /* Wa_16014892111 */
+       if (IS_DG2(ce->engine->i915))
+@@ -1326,10 +1326,10 @@ gen12_emit_indirect_ctx_xcs(const struct
+       if (!HAS_FLAT_CCS(ce->engine->i915)) {
+               if (ce->engine->class == VIDEO_DECODE_CLASS)
+                       cs = gen12_emit_aux_table_inv(ce->engine->gt,
+-                                                    cs, GEN12_VD0_AUX_NV);
++                                                    cs, GEN12_VD0_AUX_INV);
+               else if (ce->engine->class == VIDEO_ENHANCEMENT_CLASS)
+                       cs = gen12_emit_aux_table_inv(ce->engine->gt,
+-                                                    cs, GEN12_VE0_AUX_NV);
++                                                    cs, GEN12_VE0_AUX_INV);
+       }
+ 
+       return cs;
diff --git a/queue-6.1/drm-ttm-check-null-pointer-before-accessing-when-swapping.patch b/queue-6.1/drm-ttm-check-null-pointer-before-accessing-when-swapping.patch

new file mode 100644 (file)

index 0000000..87ed6db
--- /dev/null
+++ b/queue-6.1/drm-ttm-check-null-pointer-before-accessing-when-swapping.patch
@@ -0,0 +1,71 @@
+From 2dedcf414bb01b8d966eb445db1d181d92304fb2 Mon Sep 17 00:00:00 2001
+From: Guchun Chen <guchun.chen@amd.com>
+Date: Mon, 24 Jul 2023 10:42:29 +0800
+Subject: drm/ttm: check null pointer before accessing when swapping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Guchun Chen <guchun.chen@amd.com>
+
+commit 2dedcf414bb01b8d966eb445db1d181d92304fb2 upstream.
+
+Add a check to avoid null pointer dereference as below:
+
+[   90.002283] general protection fault, probably for non-canonical
+address 0xdffffc0000000000: 0000 [#1] PREEMPT SMP KASAN NOPTI
+[   90.002292] KASAN: null-ptr-deref in range
+[0x0000000000000000-0x0000000000000007]
+[   90.002346]  ? exc_general_protection+0x159/0x240
+[   90.002352]  ? asm_exc_general_protection+0x26/0x30
+[   90.002357]  ? ttm_bo_evict_swapout_allowable+0x322/0x5e0 [ttm]
+[   90.002365]  ? ttm_bo_evict_swapout_allowable+0x42e/0x5e0 [ttm]
+[   90.002373]  ttm_bo_swapout+0x134/0x7f0 [ttm]
+[   90.002383]  ? __pfx_ttm_bo_swapout+0x10/0x10 [ttm]
+[   90.002391]  ? lock_acquire+0x44d/0x4f0
+[   90.002398]  ? ttm_device_swapout+0xa5/0x260 [ttm]
+[   90.002412]  ? lock_acquired+0x355/0xa00
+[   90.002416]  ? do_raw_spin_trylock+0xb6/0x190
+[   90.002421]  ? __pfx_lock_acquired+0x10/0x10
+[   90.002426]  ? ttm_global_swapout+0x25/0x210 [ttm]
+[   90.002442]  ttm_device_swapout+0x198/0x260 [ttm]
+[   90.002456]  ? __pfx_ttm_device_swapout+0x10/0x10 [ttm]
+[   90.002472]  ttm_global_swapout+0x75/0x210 [ttm]
+[   90.002486]  ttm_tt_populate+0x187/0x3f0 [ttm]
+[   90.002501]  ttm_bo_handle_move_mem+0x437/0x590 [ttm]
+[   90.002517]  ttm_bo_validate+0x275/0x430 [ttm]
+[   90.002530]  ? __pfx_ttm_bo_validate+0x10/0x10 [ttm]
+[   90.002544]  ? kasan_save_stack+0x33/0x60
+[   90.002550]  ? kasan_set_track+0x25/0x30
+[   90.002554]  ? __kasan_kmalloc+0x8f/0xa0
+[   90.002558]  ? amdgpu_gtt_mgr_new+0x81/0x420 [amdgpu]
+[   90.003023]  ? ttm_resource_alloc+0xf6/0x220 [ttm]
+[   90.003038]  amdgpu_bo_pin_restricted+0x2dd/0x8b0 [amdgpu]
+[   90.003210]  ? __x64_sys_ioctl+0x131/0x1a0
+[   90.003210]  ? do_syscall_64+0x60/0x90
+
+Fixes: a2848d08742c ("drm/ttm: never consider pinned BOs for eviction&swap")
+Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov@gmail.com>
+Signed-off-by: Guchun Chen <guchun.chen@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Cc: stable@vger.kernel.org
+Link: https://patchwork.freedesktop.org/patch/msgid/20230724024229.1118444-1-guchun.chen@amd.com
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/ttm/ttm_bo.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/ttm/ttm_bo.c
++++ b/drivers/gpu/drm/ttm/ttm_bo.c
+@@ -552,7 +552,8 @@ static bool ttm_bo_evict_swapout_allowab
+ 
+       if (bo->pin_count) {
+               *locked = false;
+-              *busy = false;
++              if (busy)
++                      *busy = false;
+               return false;
+       }
+ 
diff --git a/queue-6.1/exfat-release-s_lock-before-calling-dir_emit.patch b/queue-6.1/exfat-release-s_lock-before-calling-dir_emit.patch

new file mode 100644 (file)

index 0000000..398ed6d
--- /dev/null
+++ b/queue-6.1/exfat-release-s_lock-before-calling-dir_emit.patch
@@ -0,0 +1,129 @@
+From ff84772fd45d486e4fc78c82e2f70ce5333543e6 Mon Sep 17 00:00:00 2001
+From: Sungjong Seo <sj1557.seo@samsung.com>
+Date: Fri, 14 Jul 2023 17:43:54 +0900
+Subject: exfat: release s_lock before calling dir_emit()
+
+From: Sungjong Seo <sj1557.seo@samsung.com>
+
+commit ff84772fd45d486e4fc78c82e2f70ce5333543e6 upstream.
+
+There is a potential deadlock reported by syzbot as below:
+
+======================================================
+WARNING: possible circular locking dependency detected
+6.4.0-next-20230707-syzkaller #0 Not tainted
+------------------------------------------------------
+syz-executor330/5073 is trying to acquire lock:
+ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: mmap_read_lock_killable include/linux/mmap_lock.h:151 [inline]
+ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: get_mmap_lock_carefully mm/memory.c:5293 [inline]
+ffff8880218527a0 (&mm->mmap_lock){++++}-{3:3}, at: lock_mm_and_find_vma+0x369/0x510 mm/memory.c:5344
+but task is already holding lock:
+ffff888019f760e0 (&sbi->s_lock){+.+.}-{3:3}, at: exfat_iterate+0x117/0xb50 fs/exfat/dir.c:232
+
+which lock already depends on the new lock.
+
+Chain exists of:
+  &mm->mmap_lock --> mapping.invalidate_lock#3 --> &sbi->s_lock
+
+ Possible unsafe locking scenario:
+
+       CPU0                    CPU1
+       ----                    ----
+  lock(&sbi->s_lock);
+                               lock(mapping.invalidate_lock#3);
+                               lock(&sbi->s_lock);
+  rlock(&mm->mmap_lock);
+
+Let's try to avoid above potential deadlock condition by moving dir_emit*()
+out of sbi->s_lock coverage.
+
+Fixes: ca06197382bd ("exfat: add directory operations")
+Cc: stable@vger.kernel.org #v5.7+
+Reported-by: syzbot+1741a5d9b79989c10bdc@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/lkml/00000000000078ee7e060066270b@google.com/T/#u
+Tested-by: syzbot+1741a5d9b79989c10bdc@syzkaller.appspotmail.com
+Signed-off-by: Sungjong Seo <sj1557.seo@samsung.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/dir.c |   27 ++++++++++++---------------
+ 1 file changed, 12 insertions(+), 15 deletions(-)
+
+--- a/fs/exfat/dir.c
++++ b/fs/exfat/dir.c
+@@ -210,7 +210,10 @@ static void exfat_free_namebuf(struct ex
+       exfat_init_namebuf(nb);
+ }
+ 
+-/* skip iterating emit_dots when dir is empty */
++/*
++ * Before calling dir_emit*(), sbi->s_lock should be released
++ * because page fault can occur in dir_emit*().
++ */
+ #define ITER_POS_FILLED_DOTS    (2)
+ static int exfat_iterate(struct file *file, struct dir_context *ctx)
+ {
+@@ -225,11 +228,10 @@ static int exfat_iterate(struct file *fi
+       int err = 0, fake_offset = 0;
+ 
+       exfat_init_namebuf(nb);
+-      mutex_lock(&EXFAT_SB(sb)->s_lock);
+ 
+       cpos = ctx->pos;
+       if (!dir_emit_dots(file, ctx))
+-              goto unlock;
++              goto out;
+ 
+       if (ctx->pos == ITER_POS_FILLED_DOTS) {
+               cpos = 0;
+@@ -241,16 +243,18 @@ static int exfat_iterate(struct file *fi
+       /* name buffer should be allocated before use */
+       err = exfat_alloc_namebuf(nb);
+       if (err)
+-              goto unlock;
++              goto out;
+ get_new:
++      mutex_lock(&EXFAT_SB(sb)->s_lock);
++
+       if (ei->flags == ALLOC_NO_FAT_CHAIN && cpos >= i_size_read(inode))
+               goto end_of_dir;
+ 
+       err = exfat_readdir(inode, &cpos, &de);
+       if (err) {
+               /*
+-               * At least we tried to read a sector.  Move cpos to next sector
+-               * position (should be aligned).
++               * At least we tried to read a sector.
++               * Move cpos to next sector position (should be aligned).
+                */
+               if (err == -EIO) {
+                       cpos += 1 << (sb->s_blocksize_bits);
+@@ -273,16 +277,10 @@ get_new:
+               inum = iunique(sb, EXFAT_ROOT_INO);
+       }
+ 
+-      /*
+-       * Before calling dir_emit(), sb_lock should be released.
+-       * Because page fault can occur in dir_emit() when the size
+-       * of buffer given from user is larger than one page size.
+-       */
+       mutex_unlock(&EXFAT_SB(sb)->s_lock);
+       if (!dir_emit(ctx, nb->lfn, strlen(nb->lfn), inum,
+                       (de.attr & ATTR_SUBDIR) ? DT_DIR : DT_REG))
+-              goto out_unlocked;
+-      mutex_lock(&EXFAT_SB(sb)->s_lock);
++              goto out;
+       ctx->pos = cpos;
+       goto get_new;
+ 
+@@ -290,9 +288,8 @@ end_of_dir:
+       if (!cpos && fake_offset)
+               cpos = ITER_POS_FILLED_DOTS;
+       ctx->pos = cpos;
+-unlock:
+       mutex_unlock(&EXFAT_SB(sb)->s_lock);
+-out_unlocked:
++out:
+       /*
+        * To improve performance, free namebuf after unlock sb_lock.
+        * If namebuf is not allocated, this function do nothing
diff --git a/queue-6.1/exfat-use-kvmalloc_array-kvfree-instead-of-kmalloc_array-kfree.patch b/queue-6.1/exfat-use-kvmalloc_array-kvfree-instead-of-kmalloc_array-kfree.patch

new file mode 100644 (file)

index 0000000..e932500
--- /dev/null
+++ b/queue-6.1/exfat-use-kvmalloc_array-kvfree-instead-of-kmalloc_array-kfree.patch
@@ -0,0 +1,81 @@
+From daf60d6cca26e50d65dac374db92e58de745ad26 Mon Sep 17 00:00:00 2001
+From: gaoming <gaoming20@hihonor.com>
+Date: Wed, 5 Jul 2023 15:15:15 +0800
+Subject: exfat: use kvmalloc_array/kvfree instead of kmalloc_array/kfree
+
+From: gaoming <gaoming20@hihonor.com>
+
+commit daf60d6cca26e50d65dac374db92e58de745ad26 upstream.
+
+The call stack shown below is a scenario in the Linux 4.19 kernel.
+Allocating memory failed where exfat fs use kmalloc_array due to
+system memory fragmentation, while the u-disk was inserted without
+recognition.
+Devices such as u-disk using the exfat file system are pluggable and
+may be insert into the system at any time.
+However, long-term running systems cannot guarantee the continuity of
+physical memory. Therefore, it's necessary to address this issue.
+
+Binder:2632_6: page allocation failure: order:4,
+ mode:0x6040c0(GFP_KERNEL|__GFP_COMP), nodemask=(null)
+Call trace:
+[242178.097582]  dump_backtrace+0x0/0x4
+[242178.097589]  dump_stack+0xf4/0x134
+[242178.097598]  warn_alloc+0xd8/0x144
+[242178.097603]  __alloc_pages_nodemask+0x1364/0x1384
+[242178.097608]  kmalloc_order+0x2c/0x510
+[242178.097612]  kmalloc_order_trace+0x40/0x16c
+[242178.097618]  __kmalloc+0x360/0x408
+[242178.097624]  load_alloc_bitmap+0x160/0x284
+[242178.097628]  exfat_fill_super+0xa3c/0xe7c
+[242178.097635]  mount_bdev+0x2e8/0x3a0
+[242178.097638]  exfat_fs_mount+0x40/0x50
+[242178.097643]  mount_fs+0x138/0x2e8
+[242178.097649]  vfs_kern_mount+0x90/0x270
+[242178.097655]  do_mount+0x798/0x173c
+[242178.097659]  ksys_mount+0x114/0x1ac
+[242178.097665]  __arm64_sys_mount+0x24/0x34
+[242178.097671]  el0_svc_common+0xb8/0x1b8
+[242178.097676]  el0_svc_handler+0x74/0x90
+[242178.097681]  el0_svc+0x8/0x340
+
+By analyzing the exfat code,we found that continuous physical memory
+is not required here,so kvmalloc_array is used can solve this problem.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: gaoming <gaoming20@hihonor.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/exfat/balloc.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/fs/exfat/balloc.c
++++ b/fs/exfat/balloc.c
+@@ -69,7 +69,7 @@ static int exfat_allocate_bitmap(struct
+       }
+       sbi->map_sectors = ((need_map_size - 1) >>
+                       (sb->s_blocksize_bits)) + 1;
+-      sbi->vol_amap = kmalloc_array(sbi->map_sectors,
++      sbi->vol_amap = kvmalloc_array(sbi->map_sectors,
+                               sizeof(struct buffer_head *), GFP_KERNEL);
+       if (!sbi->vol_amap)
+               return -ENOMEM;
+@@ -84,7 +84,7 @@ static int exfat_allocate_bitmap(struct
+                       while (j < i)
+                               brelse(sbi->vol_amap[j++]);
+ 
+-                      kfree(sbi->vol_amap);
++                      kvfree(sbi->vol_amap);
+                       sbi->vol_amap = NULL;
+                       return -EIO;
+               }
+@@ -138,7 +138,7 @@ void exfat_free_bitmap(struct exfat_sb_i
+       for (i = 0; i < sbi->map_sectors; i++)
+               __brelse(sbi->vol_amap[i]);
+ 
+-      kfree(sbi->vol_amap);
++      kvfree(sbi->vol_amap);
+ }
+ 
+ int exfat_set_bitmap(struct inode *inode, unsigned int clu, bool sync)
diff --git a/queue-6.1/firmware-arm_scmi-drop-of-node-reference-in-the-transport-channel-setup.patch b/queue-6.1/firmware-arm_scmi-drop-of-node-reference-in-the-transport-channel-setup.patch

new file mode 100644 (file)

index 0000000..bfea4af
--- /dev/null
+++ b/queue-6.1/firmware-arm_scmi-drop-of-node-reference-in-the-transport-channel-setup.patch
@@ -0,0 +1,52 @@
+From da042eb4f061a0b54aedadcaa15391490c48e1ad Mon Sep 17 00:00:00 2001
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Date: Wed, 19 Jul 2023 08:16:52 +0200
+Subject: firmware: arm_scmi: Drop OF node reference in the transport channel setup
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+commit da042eb4f061a0b54aedadcaa15391490c48e1ad upstream.
+
+The OF node reference obtained from of_parse_phandle() should be dropped
+if node is not compatible with arm,scmi-shmem.
+
+Fixes: 507cd4d2c5eb ("firmware: arm_scmi: Add compatibility checks for shmem node")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
+Link: https://lore.kernel.org/r/20230719061652.8850-1-krzysztof.kozlowski@linaro.org
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/arm_scmi/mailbox.c |    4 +++-
+ drivers/firmware/arm_scmi/smc.c     |    4 +++-
+ 2 files changed, 6 insertions(+), 2 deletions(-)
+
+--- a/drivers/firmware/arm_scmi/mailbox.c
++++ b/drivers/firmware/arm_scmi/mailbox.c
+@@ -106,8 +106,10 @@ static int mailbox_chan_setup(struct scm
+               return -ENOMEM;
+ 
+       shmem = of_parse_phandle(cdev->of_node, "shmem", idx);
+-      if (!of_device_is_compatible(shmem, "arm,scmi-shmem"))
++      if (!of_device_is_compatible(shmem, "arm,scmi-shmem")) {
++              of_node_put(shmem);
+               return -ENXIO;
++      }
+ 
+       ret = of_address_to_resource(shmem, 0, &res);
+       of_node_put(shmem);
+--- a/drivers/firmware/arm_scmi/smc.c
++++ b/drivers/firmware/arm_scmi/smc.c
+@@ -118,8 +118,10 @@ static int smc_chan_setup(struct scmi_ch
+               return -ENOMEM;
+ 
+       np = of_parse_phandle(cdev->of_node, "shmem", 0);
+-      if (!of_device_is_compatible(np, "arm,scmi-shmem"))
++      if (!of_device_is_compatible(np, "arm,scmi-shmem")) {
++              of_node_put(np);
+               return -ENXIO;
++      }
+ 
+       ret = of_address_to_resource(np, 0, &res);
+       of_node_put(np);
diff --git a/queue-6.1/libceph-fix-potential-hang-in-ceph_osdc_notify.patch b/queue-6.1/libceph-fix-potential-hang-in-ceph_osdc_notify.patch

new file mode 100644 (file)

index 0000000..59be455
--- /dev/null
+++ b/queue-6.1/libceph-fix-potential-hang-in-ceph_osdc_notify.patch
@@ -0,0 +1,69 @@
+From e6e2843230799230fc5deb8279728a7218b0d63c Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 1 Aug 2023 19:14:24 +0200
+Subject: libceph: fix potential hang in ceph_osdc_notify()
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit e6e2843230799230fc5deb8279728a7218b0d63c upstream.
+
+If the cluster becomes unavailable, ceph_osdc_notify() may hang even
+with osd_request_timeout option set because linger_notify_finish_wait()
+waits for MWatchNotify NOTIFY_COMPLETE message with no associated OSD
+request in flight -- it's completely asynchronous.
+
+Introduce an additional timeout, derived from the specified notify
+timeout.  While at it, switch both waits to killable which is more
+correct.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Reviewed-by: Xiubo Li <xiubli@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ceph/osd_client.c |   20 ++++++++++++++------
+ 1 file changed, 14 insertions(+), 6 deletions(-)
+
+--- a/net/ceph/osd_client.c
++++ b/net/ceph/osd_client.c
+@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct
+       int ret;
+ 
+       dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+-      ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
++      ret = wait_for_completion_killable(&lreq->reg_commit_wait);
+       return ret ?: lreq->reg_commit_error;
+ }
+ 
+-static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
++static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
++                                   unsigned long timeout)
+ {
+-      int ret;
++      long left;
+ 
+       dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
+-      ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
+-      return ret ?: lreq->notify_finish_error;
++      left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
++                                              ceph_timeout_jiffies(timeout));
++      if (left <= 0)
++              left = left ?: -ETIMEDOUT;
++      else
++              left = lreq->notify_finish_error; /* completed */
++
++      return left;
+ }
+ 
+ /*
+@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_cli
+       linger_submit(lreq);
+       ret = linger_reg_commit_wait(lreq);
+       if (!ret)
+-              ret = linger_notify_finish_wait(lreq);
++              ret = linger_notify_finish_wait(lreq,
++                               msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
+       else
+               dout("lreq %p failed to initiate notify %d\n", lreq, ret);
+ 
diff --git a/queue-6.1/mtd-rawnand-meson-fix-oob-available-bytes-for-ecc.patch b/queue-6.1/mtd-rawnand-meson-fix-oob-available-bytes-for-ecc.patch

new file mode 100644 (file)

index 0000000..7f816d1
--- /dev/null
+++ b/queue-6.1/mtd-rawnand-meson-fix-oob-available-bytes-for-ecc.patch
@@ -0,0 +1,44 @@
+From 7e6b04f9238eab0f684fafd158c1f32ea65b9eaa Mon Sep 17 00:00:00 2001
+From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
+Date: Wed, 5 Jul 2023 09:52:10 +0300
+Subject: mtd: rawnand: meson: fix OOB available bytes for ECC
+
+From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
+
+commit 7e6b04f9238eab0f684fafd158c1f32ea65b9eaa upstream.
+
+It is incorrect to calculate number of OOB bytes for ECC engine using
+some "already known" ECC step size (1024 bytes here). Number of such
+bytes for ECC engine must be whole OOB except 2 bytes for bad block
+marker, while proper ECC step size and strength will be selected by
+ECC logic.
+
+Fixes: 8fae856c5350 ("mtd: rawnand: meson: add support for Amlogic NAND flash controller")
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20230705065211.293500-1-AVKrasnov@sberdevices.ru
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/raw/meson_nand.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/mtd/nand/raw/meson_nand.c
++++ b/drivers/mtd/nand/raw/meson_nand.c
+@@ -1184,7 +1184,6 @@ static int meson_nand_attach_chip(struct
+       struct meson_nfc *nfc = nand_get_controller_data(nand);
+       struct meson_nfc_nand_chip *meson_chip = to_meson_nand(nand);
+       struct mtd_info *mtd = nand_to_mtd(nand);
+-      int nsectors = mtd->writesize / 1024;
+       int ret;
+ 
+       if (!mtd->name) {
+@@ -1202,7 +1201,7 @@ static int meson_nand_attach_chip(struct
+       nand->options |= NAND_NO_SUBPAGE_WRITE;
+ 
+       ret = nand_ecc_choose_conf(nand, nfc->data->ecc_caps,
+-                                 mtd->oobsize - 2 * nsectors);
++                                 mtd->oobsize - 2);
+       if (ret) {
+               dev_err(nfc->dev, "failed to ECC init\n");
+               return -EINVAL;
diff --git a/queue-6.1/mtd-spinand-toshiba-fix-ecc_get_status.patch b/queue-6.1/mtd-spinand-toshiba-fix-ecc_get_status.patch

new file mode 100644 (file)

index 0000000..7f51dee
--- /dev/null
+++ b/queue-6.1/mtd-spinand-toshiba-fix-ecc_get_status.patch
@@ -0,0 +1,54 @@
+From 8544cda94dae6be3f1359539079c68bb731428b1 Mon Sep 17 00:00:00 2001
+From: Olivier Maignial <olivier.maignial@hotmail.fr>
+Date: Fri, 23 Jun 2023 17:33:36 +0200
+Subject: mtd: spinand: toshiba: Fix ecc_get_status
+
+From: Olivier Maignial <olivier.maignial@hotmail.fr>
+
+commit 8544cda94dae6be3f1359539079c68bb731428b1 upstream.
+
+Reading ECC status is failing.
+
+tx58cxgxsxraix_ecc_get_status() is using on-stack buffer
+for SPINAND_GET_FEATURE_OP() output. It is not suitable
+for DMA needs of spi-mem.
+
+Fix this by using the spi-mem operations dedicated buffer
+spinand->scratchbuf.
+
+See
+spinand->scratchbuf:
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/include/linux/mtd/spinand.h?h=v6.3#n418
+spi_mem_check_op():
+https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/spi/spi-mem.c?h=v6.3#n199
+
+Fixes: 10949af1681d ("mtd: spinand: Add initial support for Toshiba TC58CVG2S0H")
+Cc: stable@vger.kernel.org
+Signed-off-by: Olivier Maignial <olivier.maignial@hotmail.fr>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/DB4P250MB1032553D05FBE36DEE0D311EFE23A@DB4P250MB1032.EURP250.PROD.OUTLOOK.COM
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mtd/nand/spi/toshiba.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/drivers/mtd/nand/spi/toshiba.c
++++ b/drivers/mtd/nand/spi/toshiba.c
+@@ -73,7 +73,7 @@ static int tx58cxgxsxraix_ecc_get_status
+ {
+       struct nand_device *nand = spinand_to_nand(spinand);
+       u8 mbf = 0;
+-      struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, &mbf);
++      struct spi_mem_op op = SPINAND_GET_FEATURE_OP(0x30, spinand->scratchbuf);
+ 
+       switch (status & STATUS_ECC_MASK) {
+       case STATUS_ECC_NO_BITFLIPS:
+@@ -92,7 +92,7 @@ static int tx58cxgxsxraix_ecc_get_status
+               if (spi_mem_exec_op(spinand->spimem, &op))
+                       return nanddev_get_ecc_conf(nand)->strength;
+ 
+-              mbf >>= 4;
++              mbf = *(spinand->scratchbuf) >> 4;
+ 
+               if (WARN_ON(mbf > nanddev_get_ecc_conf(nand)->strength || !mbf))
+                       return nanddev_get_ecc_conf(nand)->strength;
diff --git a/queue-6.1/net-tap_open-set-sk_uid-from-current_fsuid.patch b/queue-6.1/net-tap_open-set-sk_uid-from-current_fsuid.patch

new file mode 100644 (file)

index 0000000..cfda40c
--- /dev/null
+++ b/queue-6.1/net-tap_open-set-sk_uid-from-current_fsuid.patch
@@ -0,0 +1,55 @@
+From 5c9241f3ceab3257abe2923a59950db0dc8bb737 Mon Sep 17 00:00:00 2001
+From: Laszlo Ersek <lersek@redhat.com>
+Date: Mon, 31 Jul 2023 18:42:37 +0200
+Subject: net: tap_open(): set sk_uid from current_fsuid()
+
+From: Laszlo Ersek <lersek@redhat.com>
+
+commit 5c9241f3ceab3257abe2923a59950db0dc8bb737 upstream.
+
+Commit 66b2c338adce initializes the "sk_uid" field in the protocol socket
+(struct sock) from the "/dev/tapX" device node's owner UID. Per original
+commit 86741ec25462 ("net: core: Add a UID field to struct sock.",
+2016-11-04), that's wrong: the idea is to cache the UID of the userspace
+process that creates the socket. Commit 86741ec25462 mentions socket() and
+accept(); with "tap", the action that creates the socket is
+open("/dev/tapX").
+
+Therefore the device node's owner UID is irrelevant. In most cases,
+"/dev/tapX" will be owned by root, so in practice, commit 66b2c338adce has
+no observable effect:
+
+- before, "sk_uid" would be zero, due to undefined behavior
+  (CVE-2023-1076),
+
+- after, "sk_uid" would be zero, due to "/dev/tapX" being owned by root.
+
+What matters is the (fs)UID of the process performing the open(), so cache
+that in "sk_uid".
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Lorenzo Colitti <lorenzo@google.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Pietro Borrello <borrello@diag.uniroma1.it>
+Cc: netdev@vger.kernel.org
+Cc: stable@vger.kernel.org
+Fixes: 66b2c338adce ("tap: tap_open(): correctly initialize socket uid")
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435
+Signed-off-by: Laszlo Ersek <lersek@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -533,7 +533,7 @@ static int tap_open(struct inode *inode,
+       q->sock.state = SS_CONNECTED;
+       q->sock.file = file;
+       q->sock.ops = &tap_socket_ops;
+-      sock_init_data_uid(&q->sock, &q->sk, inode->i_uid);
++      sock_init_data_uid(&q->sock, &q->sk, current_fsuid());
+       q->sk.sk_write_space = tap_sock_write_space;
+       q->sk.sk_destruct = tap_sock_destruct;
+       q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
diff --git a/queue-6.1/net-tun_chr_open-set-sk_uid-from-current_fsuid.patch b/queue-6.1/net-tun_chr_open-set-sk_uid-from-current_fsuid.patch

new file mode 100644 (file)

index 0000000..01faba4
--- /dev/null
+++ b/queue-6.1/net-tun_chr_open-set-sk_uid-from-current_fsuid.patch
@@ -0,0 +1,55 @@
+From 9bc3047374d5bec163e83e743709e23753376f0c Mon Sep 17 00:00:00 2001
+From: Laszlo Ersek <lersek@redhat.com>
+Date: Mon, 31 Jul 2023 18:42:36 +0200
+Subject: net: tun_chr_open(): set sk_uid from current_fsuid()
+
+From: Laszlo Ersek <lersek@redhat.com>
+
+commit 9bc3047374d5bec163e83e743709e23753376f0c upstream.
+
+Commit a096ccca6e50 initializes the "sk_uid" field in the protocol socket
+(struct sock) from the "/dev/net/tun" device node's owner UID. Per
+original commit 86741ec25462 ("net: core: Add a UID field to struct
+sock.", 2016-11-04), that's wrong: the idea is to cache the UID of the
+userspace process that creates the socket. Commit 86741ec25462 mentions
+socket() and accept(); with "tun", the action that creates the socket is
+open("/dev/net/tun").
+
+Therefore the device node's owner UID is irrelevant. In most cases,
+"/dev/net/tun" will be owned by root, so in practice, commit a096ccca6e50
+has no observable effect:
+
+- before, "sk_uid" would be zero, due to undefined behavior
+  (CVE-2023-1076),
+
+- after, "sk_uid" would be zero, due to "/dev/net/tun" being owned by root.
+
+What matters is the (fs)UID of the process performing the open(), so cache
+that in "sk_uid".
+
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Lorenzo Colitti <lorenzo@google.com>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Pietro Borrello <borrello@diag.uniroma1.it>
+Cc: netdev@vger.kernel.org
+Cc: stable@vger.kernel.org
+Fixes: a096ccca6e50 ("tun: tun_chr_open(): correctly initialize socket uid")
+Bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=2173435
+Signed-off-by: Laszlo Ersek <lersek@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -3457,7 +3457,7 @@ static int tun_chr_open(struct inode *in
+       tfile->socket.file = file;
+       tfile->socket.ops = &tun_socket_ops;
+ 
+-      sock_init_data_uid(&tfile->socket, &tfile->sk, inode->i_uid);
++      sock_init_data_uid(&tfile->socket, &tfile->sk, current_fsuid());
+ 
+       tfile->sk.sk_write_space = tun_sock_write_space;
+       tfile->sk.sk_sndbuf = INT_MAX;
diff --git a/queue-6.1/open-make-resolve_cached-correctly-test-for-o_tmpfile.patch b/queue-6.1/open-make-resolve_cached-correctly-test-for-o_tmpfile.patch

new file mode 100644 (file)

index 0000000..2b7f27e
--- /dev/null
+++ b/queue-6.1/open-make-resolve_cached-correctly-test-for-o_tmpfile.patch
@@ -0,0 +1,35 @@
+From a0fc452a5d7fed986205539259df1d60546f536c Mon Sep 17 00:00:00 2001
+From: Aleksa Sarai <cyphar@cyphar.com>
+Date: Sun, 6 Aug 2023 02:11:58 +1000
+Subject: open: make RESOLVE_CACHED correctly test for O_TMPFILE
+
+From: Aleksa Sarai <cyphar@cyphar.com>
+
+commit a0fc452a5d7fed986205539259df1d60546f536c upstream.
+
+O_TMPFILE is actually __O_TMPFILE|O_DIRECTORY. This means that the old
+fast-path check for RESOLVE_CACHED would reject all users passing
+O_DIRECTORY with -EAGAIN, when in fact the intended test was to check
+for __O_TMPFILE.
+
+Cc: stable@vger.kernel.org # v5.12+
+Fixes: 99668f618062 ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED")
+Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
+Message-Id: <20230806-resolve_cached-o_tmpfile-v1-1-7ba16308465e@cyphar.com>
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/open.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/open.c
++++ b/fs/open.c
+@@ -1233,7 +1233,7 @@ inline int build_open_flags(const struct
+               lookup_flags |= LOOKUP_IN_ROOT;
+       if (how->resolve & RESOLVE_CACHED) {
+               /* Don't bother even trying for create/truncate/tmpfile open */
+-              if (flags & (O_TRUNC | O_CREAT | O_TMPFILE))
++              if (flags & (O_TRUNC | O_CREAT | __O_TMPFILE))
+                       return -EAGAIN;
+               lookup_flags |= LOOKUP_CACHED;
+       }
diff --git a/queue-6.1/powerpc-ftrace-create-a-dummy-stackframe-to-fix-stack-unwind.patch b/queue-6.1/powerpc-ftrace-create-a-dummy-stackframe-to-fix-stack-unwind.patch

new file mode 100644 (file)

index 0000000..70b9ae2
--- /dev/null
+++ b/queue-6.1/powerpc-ftrace-create-a-dummy-stackframe-to-fix-stack-unwind.patch
@@ -0,0 +1,115 @@
+From 41a506ef71eb38d94fe133f565c87c3e06ccc072 Mon Sep 17 00:00:00 2001
+From: Naveen N Rao <naveen@kernel.org>
+Date: Wed, 21 Jun 2023 10:43:49 +0530
+Subject: powerpc/ftrace: Create a dummy stackframe to fix stack unwind
+
+From: Naveen N Rao <naveen@kernel.org>
+
+commit 41a506ef71eb38d94fe133f565c87c3e06ccc072 upstream.
+
+With ppc64 -mprofile-kernel and ppc32 -pg, profiling instructions to
+call into ftrace are emitted right at function entry. The instruction
+sequence used is minimal to reduce overhead. Crucially, a stackframe is
+not created for the function being traced. This breaks stack unwinding
+since the function being traced does not have a stackframe for itself.
+As such, it never shows up in the backtrace:
+
+/sys/kernel/debug/tracing # echo 1 > /proc/sys/kernel/stack_tracer_enabled
+/sys/kernel/debug/tracing # cat stack_trace
+        Depth    Size   Location    (17 entries)
+        -----    ----   --------
+  0)     4144      32   ftrace_call+0x4/0x44
+  1)     4112     432   get_page_from_freelist+0x26c/0x1ad0
+  2)     3680     496   __alloc_pages+0x290/0x1280
+  3)     3184     336   __folio_alloc+0x34/0x90
+  4)     2848     176   vma_alloc_folio+0xd8/0x540
+  5)     2672     272   __handle_mm_fault+0x700/0x1cc0
+  6)     2400     208   handle_mm_fault+0xf0/0x3f0
+  7)     2192      80   ___do_page_fault+0x3e4/0xbe0
+  8)     2112     160   do_page_fault+0x30/0xc0
+  9)     1952     256   data_access_common_virt+0x210/0x220
+ 10)     1696     400   0xc00000000f16b100
+ 11)     1296     384   load_elf_binary+0x804/0x1b80
+ 12)      912     208   bprm_execve+0x2d8/0x7e0
+ 13)      704      64   do_execveat_common+0x1d0/0x2f0
+ 14)      640     160   sys_execve+0x54/0x70
+ 15)      480      64   system_call_exception+0x138/0x350
+ 16)      416     416   system_call_common+0x160/0x2c4
+
+Fix this by having ftrace create a dummy stackframe for the function
+being traced. With this, backtraces now capture the function being
+traced:
+
+/sys/kernel/debug/tracing # cat stack_trace
+        Depth    Size   Location    (17 entries)
+        -----    ----   --------
+  0)     3888      32   _raw_spin_trylock+0x8/0x70
+  1)     3856     576   get_page_from_freelist+0x26c/0x1ad0
+  2)     3280      64   __alloc_pages+0x290/0x1280
+  3)     3216     336   __folio_alloc+0x34/0x90
+  4)     2880     176   vma_alloc_folio+0xd8/0x540
+  5)     2704     416   __handle_mm_fault+0x700/0x1cc0
+  6)     2288      96   handle_mm_fault+0xf0/0x3f0
+  7)     2192      48   ___do_page_fault+0x3e4/0xbe0
+  8)     2144     192   do_page_fault+0x30/0xc0
+  9)     1952     608   data_access_common_virt+0x210/0x220
+ 10)     1344      16   0xc0000000334bbb50
+ 11)     1328     416   load_elf_binary+0x804/0x1b80
+ 12)      912      64   bprm_execve+0x2d8/0x7e0
+ 13)      848     176   do_execveat_common+0x1d0/0x2f0
+ 14)      672     192   sys_execve+0x54/0x70
+ 15)      480      64   system_call_exception+0x138/0x350
+ 16)      416     416   system_call_common+0x160/0x2c4
+
+This results in two additional stores in the ftrace entry code, but
+produces reliable backtraces.
+
+Fixes: 153086644fd1 ("powerpc/ftrace: Add support for -mprofile-kernel ftrace ABI")
+Cc: stable@vger.kernel.org
+Signed-off-by: Naveen N Rao <naveen@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://msgid.link/20230621051349.759567-1-naveen@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/powerpc/kernel/trace/ftrace_mprofile.S |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
++++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
+@@ -33,6 +33,9 @@
+  * and then arrange for the ftrace function to be called.
+  */
+ .macro        ftrace_regs_entry allregs
++      /* Create a minimal stack frame for representing B */
++      PPC_STLU        r1, -STACK_FRAME_MIN_SIZE(r1)
++
+       /* Create our stack frame + pt_regs */
+       PPC_STLU        r1,-SWITCH_FRAME_SIZE(r1)
+ 
+@@ -42,7 +45,7 @@
+ 
+ #ifdef CONFIG_PPC64
+       /* Save the original return address in A's stack frame */
+-      std     r0, LRSAVE+SWITCH_FRAME_SIZE(r1)
++      std     r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
+       /* Ok to continue? */
+       lbz     r3, PACA_FTRACE_ENABLED(r13)
+       cmpdi   r3, 0
+@@ -77,6 +80,8 @@
+       mflr    r7
+       /* Save it as pt_regs->nip */
+       PPC_STL r7, _NIP(r1)
++      /* Also save it in B's stackframe header for proper unwind */
++      PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1)
+       /* Save the read LR in pt_regs->link */
+       PPC_STL r0, _LINK(r1)
+ 
+@@ -142,7 +147,7 @@
+ #endif
+ 
+       /* Pop our stack frame */
+-      addi r1, r1, SWITCH_FRAME_SIZE
++      addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE
+ 
+ #ifdef CONFIG_LIVEPATCH_64
+         /* Based on the cmpd above, if the NIP was altered handle livepatch */
diff --git a/queue-6.1/rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch b/queue-6.1/rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch

new file mode 100644 (file)

index 0000000..f1a7ba0
--- /dev/null
+++ b/queue-6.1/rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch
@@ -0,0 +1,109 @@
+From 9d01e07fd1bfb4daae156ab528aa196f5ac2b2bc Mon Sep 17 00:00:00 2001
+From: Ilya Dryomov <idryomov@gmail.com>
+Date: Tue, 1 Aug 2023 19:14:24 +0200
+Subject: rbd: prevent busy loop when requesting exclusive lock
+
+From: Ilya Dryomov <idryomov@gmail.com>
+
+commit 9d01e07fd1bfb4daae156ab528aa196f5ac2b2bc upstream.
+
+Due to rbd_try_acquire_lock() effectively swallowing all but
+EBLOCKLISTED error from rbd_try_lock() ("request lock anyway") and
+rbd_request_lock() returning ETIMEDOUT error not only for an actual
+notify timeout but also when the lock owner doesn't respond, a busy
+loop inside of rbd_acquire_lock() between rbd_try_acquire_lock() and
+rbd_request_lock() is possible.
+
+Requesting the lock on EBUSY error (returned by get_lock_owner_info()
+if an incompatible lock or invalid lock owner is detected) makes very
+little sense.  The same goes for ETIMEDOUT error (might pop up pretty
+much anywhere if osd_request_timeout option is set) and many others.
+
+Just fail I/O requests on rbd_dev->acquiring_list immediately on any
+error from rbd_try_lock().
+
+Cc: stable@vger.kernel.org # 588159009d5b: rbd: retrieve and check lock owner twice before blocklisting
+Cc: stable@vger.kernel.org
+Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
+Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/block/rbd.c |   28 +++++++++++++++-------------
+ 1 file changed, 15 insertions(+), 13 deletions(-)
+
+--- a/drivers/block/rbd.c
++++ b/drivers/block/rbd.c
+@@ -3676,7 +3676,7 @@ static int rbd_lock(struct rbd_device *r
+       ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
+                           RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
+                           RBD_LOCK_TAG, "", 0);
+-      if (ret)
++      if (ret && ret != -EEXIST)
+               return ret;
+ 
+       __rbd_lock(rbd_dev, cookie);
+@@ -3879,7 +3879,7 @@ static struct ceph_locker *get_lock_owne
+                                &rbd_dev->header_oloc, RBD_LOCK_NAME,
+                                &lock_type, &lock_tag, &lockers, &num_lockers);
+       if (ret) {
+-              rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
++              rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
+               return ERR_PTR(ret);
+       }
+ 
+@@ -3941,8 +3941,10 @@ static int find_watcher(struct rbd_devic
+       ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
+                                     &rbd_dev->header_oloc, &watchers,
+                                     &num_watchers);
+-      if (ret)
++      if (ret) {
++              rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
+               return ret;
++      }
+ 
+       sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
+       for (i = 0; i < num_watchers; i++) {
+@@ -3986,8 +3988,12 @@ static int rbd_try_lock(struct rbd_devic
+               locker = refreshed_locker = NULL;
+ 
+               ret = rbd_lock(rbd_dev);
+-              if (ret != -EBUSY)
++              if (!ret)
++                      goto out;
++              if (ret != -EBUSY) {
++                      rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+                       goto out;
++              }
+ 
+               /* determine if the current lock holder is still alive */
+               locker = get_lock_owner_info(rbd_dev);
+@@ -4090,11 +4096,8 @@ static int rbd_try_acquire_lock(struct r
+ 
+       ret = rbd_try_lock(rbd_dev);
+       if (ret < 0) {
+-              rbd_warn(rbd_dev, "failed to lock header: %d", ret);
+-              if (ret == -EBLOCKLISTED)
+-                      goto out;
+-
+-              ret = 1; /* request lock anyway */
++              rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
++              goto out;
+       }
+       if (ret > 0) {
+               up_write(&rbd_dev->lock_rwsem);
+@@ -6628,12 +6631,11 @@ static int rbd_add_acquire_lock(struct r
+               cancel_delayed_work_sync(&rbd_dev->lock_dwork);
+               if (!ret)
+                       ret = -ETIMEDOUT;
+-      }
+ 
+-      if (ret) {
+-              rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
+-              return ret;
++              rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
+       }
++      if (ret)
++              return ret;
+ 
+       /*
+        * The lock may have been released by now, unless automatic lock
diff --git a/queue-6.1/rust-allocator-prevent-mis-aligned-allocation.patch b/queue-6.1/rust-allocator-prevent-mis-aligned-allocation.patch

new file mode 100644 (file)

index 0000000..b260f17
--- /dev/null
+++ b/queue-6.1/rust-allocator-prevent-mis-aligned-allocation.patch
@@ -0,0 +1,149 @@
+From b3d8aa84bbfe9b58ccc5332cacf8ea17200af310 Mon Sep 17 00:00:00 2001
+From: Boqun Feng <boqun.feng@gmail.com>
+Date: Sat, 29 Jul 2023 18:29:02 -0700
+Subject: rust: allocator: Prevent mis-aligned allocation
+
+From: Boqun Feng <boqun.feng@gmail.com>
+
+commit b3d8aa84bbfe9b58ccc5332cacf8ea17200af310 upstream.
+
+Currently the rust allocator simply passes the size of the type Layout
+to krealloc(), and in theory the alignment requirement from the type
+Layout may be larger than the guarantee provided by SLAB, which means
+the allocated object is mis-aligned.
+
+Fix this by adjusting the allocation size to the nearest power of two,
+which SLAB always guarantees a size-aligned allocation. And because Rust
+guarantees that the original size must be a multiple of alignment and
+the alignment must be a power of two, then the alignment requirement is
+satisfied.
+
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Co-developed-by: "Andreas Hindborg (Samsung)" <nmi@metaspace.dk>
+Signed-off-by: "Andreas Hindborg (Samsung)" <nmi@metaspace.dk>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Cc: stable@vger.kernel.org # v6.1+
+Acked-by: Vlastimil Babka <vbabka@suse.cz>
+Fixes: 247b365dc8dc ("rust: add `kernel` crate")
+Link: https://github.com/Rust-for-Linux/linux/issues/974
+Link: https://lore.kernel.org/r/20230730012905.643822-2-boqun.feng@gmail.com
+[ Applied rewording of comment as discussed in the mailing list. ]
+Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ rust/bindings/bindings_helper.h |    1 
+ rust/kernel/allocator.rs        |   74 +++++++++++++++++++++++++++++++---------
+ 2 files changed, 60 insertions(+), 15 deletions(-)
+
+--- a/rust/bindings/bindings_helper.h
++++ b/rust/bindings/bindings_helper.h
+@@ -9,5 +9,6 @@
+ #include <linux/slab.h>
+ 
+ /* `bindgen` gets confused at certain things. */
++const size_t BINDINGS_ARCH_SLAB_MINALIGN = ARCH_SLAB_MINALIGN;
+ const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
+ const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
+--- a/rust/kernel/allocator.rs
++++ b/rust/kernel/allocator.rs
+@@ -9,6 +9,36 @@ use crate::bindings;
+ 
+ struct KernelAllocator;
+ 
++/// Calls `krealloc` with a proper size to alloc a new object aligned to `new_layout`'s alignment.
++///
++/// # Safety
++///
++/// - `ptr` can be either null or a pointer which has been allocated by this allocator.
++/// - `new_layout` must have a non-zero size.
++unsafe fn krealloc_aligned(ptr: *mut u8, new_layout: Layout, flags: bindings::gfp_t) -> *mut u8 {
++    // Customized layouts from `Layout::from_size_align()` can have size < align, so pad first.
++    let layout = new_layout.pad_to_align();
++
++    let mut size = layout.size();
++
++    if layout.align() > bindings::BINDINGS_ARCH_SLAB_MINALIGN {
++        // The alignment requirement exceeds the slab guarantee, thus try to enlarge the size
++        // to use the "power-of-two" size/alignment guarantee (see comments in `kmalloc()` for
++        // more information).
++        //
++        // Note that `layout.size()` (after padding) is guaranteed to be a multiple of
++        // `layout.align()`, so `next_power_of_two` gives enough alignment guarantee.
++        size = size.next_power_of_two();
++    }
++
++    // SAFETY:
++    // - `ptr` is either null or a pointer returned from a previous `k{re}alloc()` by the
++    //   function safety requirement.
++    // - `size` is greater than 0 since it's either a `layout.size()` (which cannot be zero
++    //    according to the function safety requirement) or a result from `next_power_of_two()`.
++    unsafe { bindings::krealloc(ptr as *const core::ffi::c_void, size, flags) as *mut u8 }
++}
++
+ unsafe impl GlobalAlloc for KernelAllocator {
+     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
+         // `krealloc()` is used instead of `kmalloc()` because the latter is
+@@ -30,10 +60,20 @@ static ALLOCATOR: KernelAllocator = Kern
+ // to extract the object file that has them from the archive. For the moment,
+ // let's generate them ourselves instead.
+ //
++// Note: Although these are *safe* functions, they are called by the compiler
++// with parameters that obey the same `GlobalAlloc` function safety
++// requirements: size and align should form a valid layout, and size is
++// greater than 0.
++//
+ // Note that `#[no_mangle]` implies exported too, nowadays.
+ #[no_mangle]
+-fn __rust_alloc(size: usize, _align: usize) -> *mut u8 {
+-    unsafe { bindings::krealloc(core::ptr::null(), size, bindings::GFP_KERNEL) as *mut u8 }
++fn __rust_alloc(size: usize, align: usize) -> *mut u8 {
++    // SAFETY: See assumption above.
++    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
++
++    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
++    // than 0.
++    unsafe { krealloc_aligned(ptr::null_mut(), layout, bindings::GFP_KERNEL) }
+ }
+ 
+ #[no_mangle]
+@@ -42,23 +82,27 @@ fn __rust_dealloc(ptr: *mut u8, _size: u
+ }
+ 
+ #[no_mangle]
+-fn __rust_realloc(ptr: *mut u8, _old_size: usize, _align: usize, new_size: usize) -> *mut u8 {
+-    unsafe {
+-        bindings::krealloc(
+-            ptr as *const core::ffi::c_void,
+-            new_size,
+-            bindings::GFP_KERNEL,
+-        ) as *mut u8
+-    }
++fn __rust_realloc(ptr: *mut u8, _old_size: usize, align: usize, new_size: usize) -> *mut u8 {
++    // SAFETY: See assumption above.
++    let new_layout = unsafe { Layout::from_size_align_unchecked(new_size, align) };
++
++    // SAFETY: Per assumption above, `ptr` is allocated by `__rust_*` before, and the size of
++    // `new_layout` is greater than 0.
++    unsafe { krealloc_aligned(ptr, new_layout, bindings::GFP_KERNEL) }
+ }
+ 
+ #[no_mangle]
+-fn __rust_alloc_zeroed(size: usize, _align: usize) -> *mut u8 {
++fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8 {
++    // SAFETY: See assumption above.
++    let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
++
++    // SAFETY: `ptr::null_mut()` is null, per assumption above the size of `layout` is greater
++    // than 0.
+     unsafe {
+-        bindings::krealloc(
+-            core::ptr::null(),
+-            size,
++        krealloc_aligned(
++            ptr::null_mut(),
++            layout,
+             bindings::GFP_KERNEL | bindings::__GFP_ZERO,
+-        ) as *mut u8
++        )
+     }
+ }
diff --git a/queue-6.1/scsi-storvsc-limit-max_sectors-for-virtual-fibre-channel-devices.patch b/queue-6.1/scsi-storvsc-limit-max_sectors-for-virtual-fibre-channel-devices.patch

new file mode 100644 (file)

index 0000000..fd0a3f7
--- /dev/null
+++ b/queue-6.1/scsi-storvsc-limit-max_sectors-for-virtual-fibre-channel-devices.patch
@@ -0,0 +1,51 @@
+From 010c1e1c5741365dbbf44a5a5bb9f30192875c4c Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mikelley@microsoft.com>
+Date: Thu, 20 Jul 2023 14:05:02 -0700
+Subject: scsi: storvsc: Limit max_sectors for virtual Fibre Channel devices
+
+From: Michael Kelley <mikelley@microsoft.com>
+
+commit 010c1e1c5741365dbbf44a5a5bb9f30192875c4c upstream.
+
+The Hyper-V host is queried to get the max transfer size that it supports,
+and this value is used to set max_sectors for the synthetic SCSI
+controller.  However, this max transfer size may be too large for virtual
+Fibre Channel devices, which are limited to 512 Kbytes.  If a larger
+transfer size is used with a vFC device, Hyper-V always returns an error,
+and storvsc logs a message like this where the SRB status and SCSI status
+are both zero:
+
+hv_storvsc <GUID>: tag#197 cmd 0x8a status: scsi 0x0 srb 0x0 hv 0xc0000001
+
+Add logic to limit the max transfer size to 512 Kbytes for vFC devices.
+
+Fixes: 1d3e0980782f ("scsi: storvsc: Correct reporting of Hyper-V I/O size limits")
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Kelley <mikelley@microsoft.com>
+Link: https://lore.kernel.org/r/1689887102-32806-1-git-send-email-mikelley@microsoft.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/scsi/storvsc_drv.c |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/drivers/scsi/storvsc_drv.c
++++ b/drivers/scsi/storvsc_drv.c
+@@ -365,6 +365,7 @@ static void storvsc_on_channel_callback(
+ #define STORVSC_FC_MAX_LUNS_PER_TARGET                        255
+ #define STORVSC_FC_MAX_TARGETS                                128
+ #define STORVSC_FC_MAX_CHANNELS                               8
++#define STORVSC_FC_MAX_XFER_SIZE                      ((u32)(512 * 1024))
+ 
+ #define STORVSC_IDE_MAX_LUNS_PER_TARGET                       64
+ #define STORVSC_IDE_MAX_TARGETS                               1
+@@ -2002,6 +2003,9 @@ static int storvsc_probe(struct hv_devic
+        * protecting it from any weird value.
+        */
+       max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
++      if (is_fc)
++              max_xfer_bytes = min(max_xfer_bytes, STORVSC_FC_MAX_XFER_SIZE);
++
+       /* max_hw_sectors_kb */
+       host->max_sectors = max_xfer_bytes >> 9;
+       /*
diff --git a/queue-6.1/scsi-zfcp-defer-fc_rport-blocking-until-after-adisc-response.patch b/queue-6.1/scsi-zfcp-defer-fc_rport-blocking-until-after-adisc-response.patch

new file mode 100644 (file)

index 0000000..ef46422
--- /dev/null
+++ b/queue-6.1/scsi-zfcp-defer-fc_rport-blocking-until-after-adisc-response.patch
@@ -0,0 +1,60 @@
+From e65851989001c0c9ba9177564b13b38201c0854c Mon Sep 17 00:00:00 2001
+From: Steffen Maier <maier@linux.ibm.com>
+Date: Mon, 24 Jul 2023 16:51:56 +0200
+Subject: scsi: zfcp: Defer fc_rport blocking until after ADISC response
+
+From: Steffen Maier <maier@linux.ibm.com>
+
+commit e65851989001c0c9ba9177564b13b38201c0854c upstream.
+
+Storage devices are free to send RSCNs, e.g. for internal state changes. If
+this happens on all connected paths, zfcp risks temporarily losing all
+paths at the same time. This has strong requirements on multipath
+configuration such as "no_path_retry queue".
+
+Avoid such situations by deferring fc_rport blocking until after the ADISC
+response, when any actual state change of the remote port became clear.
+The already existing port recovery triggers explicitly block the fc_rport.
+The triggers are: on ADISC reject or timeout (typical cable pull case), and
+on ADISC indicating that the remote port has changed its WWPN or
+the port is meanwhile no longer open.
+
+As a side effect, this also removes a confusing direct function call to
+another work item function zfcp_scsi_rport_work() instead of scheduling
+that other work item. It was probably done that way to have the rport block
+side effect immediate and synchronous to the caller.
+
+Fixes: a2fa0aede07c ("[SCSI] zfcp: Block FC transport rports early on errors")
+Cc: stable@vger.kernel.org #v2.6.30+
+Reviewed-by: Benjamin Block <bblock@linux.ibm.com>
+Reviewed-by: Fedor Loshakov <loshakov@linux.ibm.com>
+Signed-off-by: Steffen Maier <maier@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230724145156.3920244-1-maier@linux.ibm.com
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/s390/scsi/zfcp_fc.c |    6 +-----
+ 1 file changed, 1 insertion(+), 5 deletions(-)
+
+--- a/drivers/s390/scsi/zfcp_fc.c
++++ b/drivers/s390/scsi/zfcp_fc.c
+@@ -534,8 +534,7 @@ static void zfcp_fc_adisc_handler(void *
+ 
+       /* re-init to undo drop from zfcp_fc_adisc() */
+       port->d_id = ntoh24(adisc_resp->adisc_port_id);
+-      /* port is good, unblock rport without going through erp */
+-      zfcp_scsi_schedule_rport_register(port);
++      /* port is still good, nothing to do */
+  out:
+       atomic_andnot(ZFCP_STATUS_PORT_LINK_TEST, &port->status);
+       put_device(&port->dev);
+@@ -595,9 +594,6 @@ void zfcp_fc_link_test_work(struct work_
+       int retval;
+ 
+       set_worker_desc("zadisc%16llx", port->wwpn); /* < WORKER_DESC_LEN=24 */
+-      get_device(&port->dev);
+-      port->rport_task = RPORT_DEL;
+-      zfcp_scsi_rport_work(&port->rport_work);
+ 
+       /* only issue one test command at one time per port */
+       if (atomic_read(&port->status) & ZFCP_STATUS_PORT_LINK_TEST)
diff --git a/queue-6.1/series b/queue-6.1/series

index 39a2f438ae69dcf3aea1d9191758f331b4fe7653..9acc4f2469684d1ce4cbdfc7befcfb540f0158db 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -67,3 +67,30 @@ tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
  tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
  tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
  tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
+rust-allocator-prevent-mis-aligned-allocation.patch
+scsi-zfcp-defer-fc_rport-blocking-until-after-adisc-response.patch
+scsi-storvsc-limit-max_sectors-for-virtual-fibre-channel-devices.patch
+libceph-fix-potential-hang-in-ceph_osdc_notify.patch
+usb-zaurus-add-id-for-a-300-b-500-c-700.patch
+ceph-defer-stopping-mdsc-delayed_work.patch
+firmware-arm_scmi-drop-of-node-reference-in-the-transport-channel-setup.patch
+exfat-use-kvmalloc_array-kvfree-instead-of-kmalloc_array-kfree.patch
+exfat-release-s_lock-before-calling-dir_emit.patch
+mtd-spinand-toshiba-fix-ecc_get_status.patch
+mtd-rawnand-meson-fix-oob-available-bytes-for-ecc.patch
+bpf-disable-preemption-in-bpf_perf_event_output.patch
+arm64-dts-stratix10-fix-incorrect-i2c-property-for-scl-signal.patch
+net-tun_chr_open-set-sk_uid-from-current_fsuid.patch
+net-tap_open-set-sk_uid-from-current_fsuid.patch
+wifi-mt76-mt7615-do-not-advertise-5-ghz-on-first-phy-of-mt7615d-dbdc.patch
+x86-hyperv-disable-ibt-when-hypercall-page-lacks-endbr-instruction.patch
+rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch
+bpf-disable-preemption-in-bpf_event_output.patch
+powerpc-ftrace-create-a-dummy-stackframe-to-fix-stack-unwind.patch
+arm64-fpsimd-sync-and-zero-pad-fpsimd-state-for-streaming-sve.patch
+arm64-fpsimd-clear-sme-state-in-the-target-task-when-setting-the-vl.patch
+arm64-fpsimd-sync-fpsimd-state-with-sve-for-sme-only-systems.patch
+open-make-resolve_cached-correctly-test-for-o_tmpfile.patch
+drm-ttm-check-null-pointer-before-accessing-when-swapping.patch
+drm-i915-fix-premature-release-of-request-s-reusable-memory.patch
+drm-i915-gt-cleanup-aux-invalidation-registers.patch
diff --git a/queue-6.1/usb-zaurus-add-id-for-a-300-b-500-c-700.patch b/queue-6.1/usb-zaurus-add-id-for-a-300-b-500-c-700.patch

new file mode 100644 (file)

index 0000000..7e2c585
--- /dev/null
+++ b/queue-6.1/usb-zaurus-add-id-for-a-300-b-500-c-700.patch
@@ -0,0 +1,111 @@
+From b99225b4fe297d07400f9e2332ecd7347b224f8d Mon Sep 17 00:00:00 2001
+From: Ross Maynard <bids.7405@bigpond.com>
+Date: Mon, 31 Jul 2023 15:42:04 +1000
+Subject: USB: zaurus: Add ID for A-300/B-500/C-700
+
+From: Ross Maynard <bids.7405@bigpond.com>
+
+commit b99225b4fe297d07400f9e2332ecd7347b224f8d upstream.
+
+The SL-A300, B500/5600, and C700 devices no longer auto-load because of
+"usbnet: Remove over-broad module alias from zaurus."
+This patch adds IDs for those 3 devices.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=217632
+Fixes: 16adf5d07987 ("usbnet: Remove over-broad module alias from zaurus.")
+Signed-off-by: Ross Maynard <bids.7405@bigpond.com>
+Cc: stable@vger.kernel.org
+Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/69b5423b-2013-9fc9-9569-58e707d9bafb@bigpond.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/usb/cdc_ether.c |   21 +++++++++++++++++++++
+ drivers/net/usb/zaurus.c    |   21 +++++++++++++++++++++
+ 2 files changed, 42 insertions(+)
+
+--- a/drivers/net/usb/cdc_ether.c
++++ b/drivers/net/usb/cdc_ether.c
+@@ -618,6 +618,13 @@ static const struct usb_device_id produc
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
++      .idProduct              = 0x8005,   /* A-300 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info        = 0,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
+       .idProduct              = 0x8006,       /* B-500/SL-5600 */
+       ZAURUS_MASTER_INTERFACE,
+       .driver_info            = 0,
+@@ -625,11 +632,25 @@ static const struct usb_device_id        produc
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
++      .idProduct              = 0x8006,   /* B-500/SL-5600 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info        = 0,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
+       .idProduct              = 0x8007,       /* C-700 */
+       ZAURUS_MASTER_INTERFACE,
+       .driver_info            = 0,
+ }, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
++      .idProduct              = 0x8007,   /* C-700 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info        = 0,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x9031,       /* C-750 C-760 */
+--- a/drivers/net/usb/zaurus.c
++++ b/drivers/net/usb/zaurus.c
+@@ -289,11 +289,25 @@ static const struct usb_device_id        produc
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                         | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
++      .idProduct              = 0x8005,       /* A-300 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info = (unsigned long)&bogus_mdlm_info,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
+       .idProduct              = 0x8006,       /* B-500/SL-5600 */
+       ZAURUS_MASTER_INTERFACE,
+       .driver_info = ZAURUS_PXA_INFO,
+ }, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
++      .idProduct              = 0x8006,       /* B-500/SL-5600 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info = (unsigned long)&bogus_mdlm_info,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                 | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x8007,       /* C-700 */
+@@ -301,6 +315,13 @@ static const struct usb_device_id produc
+       .driver_info = ZAURUS_PXA_INFO,
+ }, {
+       .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
++                        | USB_DEVICE_ID_MATCH_DEVICE,
++      .idVendor               = 0x04DD,
++      .idProduct              = 0x8007,       /* C-700 */
++      ZAURUS_FAKE_INTERFACE,
++      .driver_info = (unsigned long)&bogus_mdlm_info,
++}, {
++      .match_flags    =   USB_DEVICE_ID_MATCH_INT_INFO
+                | USB_DEVICE_ID_MATCH_DEVICE,
+       .idVendor               = 0x04DD,
+       .idProduct              = 0x9031,       /* C-750 C-760 */
diff --git a/queue-6.1/wifi-mt76-mt7615-do-not-advertise-5-ghz-on-first-phy-of-mt7615d-dbdc.patch b/queue-6.1/wifi-mt76-mt7615-do-not-advertise-5-ghz-on-first-phy-of-mt7615d-dbdc.patch

new file mode 100644 (file)

index 0000000..aa83c18
--- /dev/null
+++ b/queue-6.1/wifi-mt76-mt7615-do-not-advertise-5-ghz-on-first-phy-of-mt7615d-dbdc.patch
@@ -0,0 +1,45 @@
+From 421033deb91521aa6a9255e495cb106741a52275 Mon Sep 17 00:00:00 2001
+From: Paul Fertser <fercerpav@gmail.com>
+Date: Mon, 5 Jun 2023 10:34:07 +0300
+Subject: wifi: mt76: mt7615: do not advertise 5 GHz on first phy of MT7615D (DBDC)
+
+From: Paul Fertser <fercerpav@gmail.com>
+
+commit 421033deb91521aa6a9255e495cb106741a52275 upstream.
+
+On DBDC devices the first (internal) phy is only capable of using
+2.4 GHz band, and the 5 GHz band is exposed via a separate phy object,
+so avoid the false advertising.
+
+Reported-by: Rani Hod <rani.hod@gmail.com>
+Closes: https://github.com/openwrt/openwrt/pull/12361
+Fixes: 7660a1bd0c22 ("mt76: mt7615: register ext_phy if DBDC is detected")
+Cc: stable@vger.kernel.org
+Signed-off-by: Paul Fertser <fercerpav@gmail.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Acked-by: Felix Fietkau <nbd@nbd.name>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://lore.kernel.org/r/20230605073408.8699-1-fercerpav@gmail.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
++++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+@@ -123,12 +123,12 @@ mt7615_eeprom_parse_hw_band_cap(struct m
+       case MT_EE_5GHZ:
+               dev->mphy.cap.has_5ghz = true;
+               break;
+-      case MT_EE_2GHZ:
+-              dev->mphy.cap.has_2ghz = true;
+-              break;
+       case MT_EE_DBDC:
+               dev->dbdc_support = true;
+               fallthrough;
++      case MT_EE_2GHZ:
++              dev->mphy.cap.has_2ghz = true;
++              break;
+       default:
+               dev->mphy.cap.has_2ghz = true;
+               dev->mphy.cap.has_5ghz = true;
diff --git a/queue-6.1/x86-hyperv-disable-ibt-when-hypercall-page-lacks-endbr-instruction.patch b/queue-6.1/x86-hyperv-disable-ibt-when-hypercall-page-lacks-endbr-instruction.patch

new file mode 100644 (file)

index 0000000..a65ea28
--- /dev/null
+++ b/queue-6.1/x86-hyperv-disable-ibt-when-hypercall-page-lacks-endbr-instruction.patch
@@ -0,0 +1,70 @@
+From d5ace2a776442d80674eff9ed42e737f7dd95056 Mon Sep 17 00:00:00 2001
+From: Michael Kelley <mikelley@microsoft.com>
+Date: Fri, 21 Jul 2023 21:51:16 -0700
+Subject: x86/hyperv: Disable IBT when hypercall page lacks ENDBR instruction
+
+From: Michael Kelley <mikelley@microsoft.com>
+
+commit d5ace2a776442d80674eff9ed42e737f7dd95056 upstream.
+
+On hardware that supports Indirect Branch Tracking (IBT), Hyper-V VMs
+with ConfigVersion 9.3 or later support IBT in the guest. However,
+current versions of Hyper-V have a bug in that there's not an ENDBR64
+instruction at the beginning of the hypercall page. Since hypercalls are
+made with an indirect call to the hypercall page, all hypercall attempts
+fail with an exception and Linux panics.
+
+A Hyper-V fix is in progress to add ENDBR64. But guard against the Linux
+panic by clearing X86_FEATURE_IBT if the hypercall page doesn't start
+with ENDBR. The VM will boot and run without IBT.
+
+If future Linux 32-bit kernels were to support IBT, additional hypercall
+page hackery would be needed to make IBT work for such kernels in a
+Hyper-V VM.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Michael Kelley <mikelley@microsoft.com>
+Link: https://lore.kernel.org/r/1690001476-98594-1-git-send-email-mikelley@microsoft.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/hyperv/hv_init.c |   21 +++++++++++++++++++++
+ 1 file changed, 21 insertions(+)
+
+--- a/arch/x86/hyperv/hv_init.c
++++ b/arch/x86/hyperv/hv_init.c
+@@ -14,6 +14,7 @@
+ #include <asm/apic.h>
+ #include <asm/desc.h>
+ #include <asm/sev.h>
++#include <asm/ibt.h>
+ #include <asm/hypervisor.h>
+ #include <asm/hyperv-tlfs.h>
+ #include <asm/mshyperv.h>
+@@ -468,6 +469,26 @@ void __init hyperv_init(void)
+       }
+ 
+       /*
++       * Some versions of Hyper-V that provide IBT in guest VMs have a bug
++       * in that there's no ENDBR64 instruction at the entry to the
++       * hypercall page. Because hypercalls are invoked via an indirect call
++       * to the hypercall page, all hypercall attempts fail when IBT is
++       * enabled, and Linux panics. For such buggy versions, disable IBT.
++       *
++       * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall
++       * page, so if future Linux kernel versions enable IBT for 32-bit
++       * builds, additional hypercall page hackery will be required here
++       * to provide an ENDBR32.
++       */
++#ifdef CONFIG_X86_KERNEL_IBT
++      if (cpu_feature_enabled(X86_FEATURE_IBT) &&
++          *(u32 *)hv_hypercall_pg != gen_endbr()) {
++              setup_clear_cpu_cap(X86_FEATURE_IBT);
++              pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n");
++      }
++#endif
++
++      /*
+        * hyperv_init() is called before LAPIC is initialized: see
+        * apic_intr_mode_init() -> x86_platform.apic_post_init() and
+        * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Mon, 7 Aug 2023 07:24:46 +0000 (09:24 +0200)
queue-6.1/arm64-dts-stratix10-fix-incorrect-i2c-property-for-scl-signal.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/arm64-fpsimd-clear-sme-state-in-the-target-task-when-setting-the-vl.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/arm64-fpsimd-sync-and-zero-pad-fpsimd-state-for-streaming-sve.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/arm64-fpsimd-sync-fpsimd-state-with-sve-for-sme-only-systems.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-disable-preemption-in-bpf_event_output.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-disable-preemption-in-bpf_perf_event_output.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ceph-defer-stopping-mdsc-delayed_work.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-i915-fix-premature-release-of-request-s-reusable-memory.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-i915-gt-cleanup-aux-invalidation-registers.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-ttm-check-null-pointer-before-accessing-when-swapping.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/exfat-release-s_lock-before-calling-dir_emit.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/exfat-use-kvmalloc_array-kvfree-instead-of-kmalloc_array-kfree.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/firmware-arm_scmi-drop-of-node-reference-in-the-transport-channel-setup.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/libceph-fix-potential-hang-in-ceph_osdc_notify.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mtd-rawnand-meson-fix-oob-available-bytes-for-ecc.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mtd-spinand-toshiba-fix-ecc_get_status.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-tap_open-set-sk_uid-from-current_fsuid.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-tun_chr_open-set-sk_uid-from-current_fsuid.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/open-make-resolve_cached-correctly-test-for-o_tmpfile.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/powerpc-ftrace-create-a-dummy-stackframe-to-fix-stack-unwind.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/rbd-prevent-busy-loop-when-requesting-exclusive-lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/rust-allocator-prevent-mis-aligned-allocation.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-storvsc-limit-max_sectors-for-virtual-fibre-channel-devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/scsi-zfcp-defer-fc_rport-blocking-until-after-adisc-response.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/usb-zaurus-add-id-for-a-300-b-500-c-700.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-mt76-mt7615-do-not-advertise-5-ghz-on-first-phy-of-mt7615d-dbdc.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/x86-hyperv-disable-ibt-when-hypercall-page-lacks-endbr-instruction.patch	[new file with mode: 0644]	patch \| blob