]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 6.3
authorSasha Levin <sashal@kernel.org>
Mon, 29 May 2023 02:43:50 +0000 (22:43 -0400)
committerSasha Levin <sashal@kernel.org>
Mon, 29 May 2023 02:43:50 +0000 (22:43 -0400)
Signed-off-by: Sasha Levin <sashal@kernel.org>
42 files changed:
queue-6.3/arm-dts-imx6ull-dhcor-set-and-limit-the-mode-for-pmi.patch [new file with mode: 0644]
queue-6.3/asoc-intel-avs-fix-module-lookup.patch [new file with mode: 0644]
queue-6.3/blk-mq-fix-race-condition-in-active-queue-accounting.patch [new file with mode: 0644]
queue-6.3/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch [new file with mode: 0644]
queue-6.3/bpf-netdev-init-the-offload-table-earlier.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-convert-schedule_work-into-delayed_work.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-handle-fin-correctly.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-improved-check-for-empty-queue.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-incorrectly-handling-copied_seq.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-pass-skb-ownership-through-read_skb.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-reschedule-is-now-done-through-backlog.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-tcp-data-stall-on-recv-before-accept.patch [new file with mode: 0644]
queue-6.3/bpf-sockmap-wake-up-polling-after-data-copy.patch [new file with mode: 0644]
queue-6.3/coresight-perf-release-coresight-path-when-alloc-tra.patch [new file with mode: 0644]
queue-6.3/cpufreq-amd-pstate-remove-fast_switch_possible-flag-.patch [new file with mode: 0644]
queue-6.3/cxl-port-fix-null-pointer-access-in-devm_cxl_add_por.patch [new file with mode: 0644]
queue-6.3/drm-i915-disable-dplls-before-disconnecting-the-tc-p.patch [new file with mode: 0644]
queue-6.3/drm-i915-fix-pipedmc-disabling-for-a-bigjoiner-confi.patch [new file with mode: 0644]
queue-6.3/drm-i915-move-shared-dpll-disabling-into-crtc-disabl.patch [new file with mode: 0644]
queue-6.3/firmware-arm_ffa-fix-usage-of-partition-info-get-cou.patch [new file with mode: 0644]
queue-6.3/firmware-arm_scmi-fix-incorrect-alloc_workqueue-invo.patch [new file with mode: 0644]
queue-6.3/gpio-f7188x-fix-chip-name-and-pin-count-on-nuvoton-c.patch [new file with mode: 0644]
queue-6.3/gpiolib-fix-allocation-of-mixed-dynamic-static-gpios.patch [new file with mode: 0644]
queue-6.3/net-fec-add-dma_wmb-to-ensure-correct-descriptor-val.patch [new file with mode: 0644]
queue-6.3/net-mlx5-e-switch-devcom-sync-devcom-events-and-devc.patch [new file with mode: 0644]
queue-6.3/net-mlx5e-tc-fix-using-eswitch-mapping-in-nic-mode.patch [new file with mode: 0644]
queue-6.3/net-phy-mscc-enable-vsc8501-2-rgmii-rx-clock.patch [new file with mode: 0644]
queue-6.3/platform-x86-amd-pmf-fix-cnqf-and-auto-mode-after-re.patch [new file with mode: 0644]
queue-6.3/power-supply-rt9467-fix-passing-zero-to-dev_err_prob.patch [new file with mode: 0644]
queue-6.3/revert-net-mlx5-expose-steering-dropped-packets-coun.patch [new file with mode: 0644]
queue-6.3/revert-net-mlx5-expose-vnic-diagnostic-counters-for-.patch [new file with mode: 0644]
queue-6.3/selftests-bpf-fix-pkg-config-call-building-sign-file.patch [new file with mode: 0644]
queue-6.3/series
queue-6.3/spi-spi-geni-qcom-select-fifo-mode-for-chip-select.patch [new file with mode: 0644]
queue-6.3/tls-rx-device-fix-checking-decryption-status.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-don-t-use-gfp_kernel-in-softirq-context.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-factor-out-copying-skb-data.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-fix-determining-record-length-in-copy-mo.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-force-mixed-decrypted-records-into-copy-.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-preserve-decryption-status-of-skbs-when-.patch [new file with mode: 0644]
queue-6.3/tls-rx-strp-set-the-skb-len-of-detached-cow-ed-skbs.patch [new file with mode: 0644]
queue-6.3/vfio-type1-check-pfn-valid-before-converting-to-stru.patch [new file with mode: 0644]

diff --git a/queue-6.3/arm-dts-imx6ull-dhcor-set-and-limit-the-mode-for-pmi.patch b/queue-6.3/arm-dts-imx6ull-dhcor-set-and-limit-the-mode-for-pmi.patch
new file mode 100644 (file)
index 0000000..04b8566
--- /dev/null
@@ -0,0 +1,72 @@
+From 13ccf736fe79f6fda3afce943dc64e202c10015e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 May 2023 13:14:24 +0200
+Subject: ARM: dts: imx6ull-dhcor: Set and limit the mode for PMIC buck 1, 2
+ and 3
+
+From: Christoph Niedermaier <cniedermaier@dh-electronics.com>
+
+[ Upstream commit 892943d7729bbfb2edeed9e323eba9a5cec21c49 ]
+
+According to Renesas Electronics (formerly Dialog Semiconductor), the
+standard AUTO mode of the PMIC DA9061 can lead to stability problems
+depending on the hardware revision. It is recommended to set a defined
+mode such as PFM or PWM permanently. So set and limit the mode for
+buck 1, 2 and 3 to a fixed one.
+
+Fixes: 611b6c891e40 ("ARM: dts: imx6ull-dhcom: Add DH electronics DHCOM i.MX6ULL SoM and PDK2 board")
+Signed-off-by: Christoph Niedermaier <cniedermaier@dh-electronics.com>
+Reviewed-by: Marek Vasut <marex@denx.de>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/imx6ull-dhcor-som.dtsi | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
+index 5882c7565f649..32a6022625d97 100644
+--- a/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
++++ b/arch/arm/boot/dts/imx6ull-dhcor-som.dtsi
+@@ -8,6 +8,7 @@
+ #include <dt-bindings/input/input.h>
+ #include <dt-bindings/leds/common.h>
+ #include <dt-bindings/pwm/pwm.h>
++#include <dt-bindings/regulator/dlg,da9063-regulator.h>
+ #include "imx6ull.dtsi"
+ / {
+@@ -84,16 +85,20 @@ onkey {
+               regulators {
+                       vdd_soc_in_1v4: buck1 {
++                              regulator-allowed-modes = <DA9063_BUCK_MODE_SLEEP>; /* PFM */
+                               regulator-always-on;
+                               regulator-boot-on;
++                              regulator-initial-mode = <DA9063_BUCK_MODE_SLEEP>;
+                               regulator-max-microvolt = <1400000>;
+                               regulator-min-microvolt = <1400000>;
+                               regulator-name = "vdd_soc_in_1v4";
+                       };
+                       vcc_3v3: buck2 {
++                              regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */
+                               regulator-always-on;
+                               regulator-boot-on;
++                              regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-min-microvolt = <3300000>;
+                               regulator-name = "vcc_3v3";
+@@ -106,8 +111,10 @@ vcc_3v3: buck2 {
+                        * the voltage is set to 1.5V.
+                        */
+                       vcc_ddr_1v35: buck3 {
++                              regulator-allowed-modes = <DA9063_BUCK_MODE_SYNC>; /* PWM */
+                               regulator-always-on;
+                               regulator-boot-on;
++                              regulator-initial-mode = <DA9063_BUCK_MODE_SYNC>;
+                               regulator-max-microvolt = <1500000>;
+                               regulator-min-microvolt = <1500000>;
+                               regulator-name = "vcc_ddr_1v35";
+-- 
+2.39.2
+
diff --git a/queue-6.3/asoc-intel-avs-fix-module-lookup.patch b/queue-6.3/asoc-intel-avs-fix-module-lookup.patch
new file mode 100644 (file)
index 0000000..1a12cde
--- /dev/null
@@ -0,0 +1,85 @@
+From 543841576cb1487593cbae54f301c42b99a5e8bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 22:17:05 +0200
+Subject: ASoC: Intel: avs: Fix module lookup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+
+[ Upstream commit ff04437f6dcd138b50483afc7b313f016020ce8f ]
+
+When changing value of kcontrol, FW module to which data should be send
+needs to be found. Currently it is done in improper way, fix it. Change
+function name to indicate that it looks only for volume module.
+
+This allows to change volume during runtime, instead of only changing
+init value.
+
+Fixes: be2b81b519d7 ("ASoC: Intel: avs: Parse control tuples")
+Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
+Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
+Link: https://lore.kernel.org/r/20230519201711.4073845-2-amadeuszx.slawinski@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/intel/avs/control.c | 22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/sound/soc/intel/avs/control.c b/sound/soc/intel/avs/control.c
+index a8b14b784f8a5..3dfa2e9816db0 100644
+--- a/sound/soc/intel/avs/control.c
++++ b/sound/soc/intel/avs/control.c
+@@ -21,17 +21,25 @@ static struct avs_dev *avs_get_kcontrol_adev(struct snd_kcontrol *kcontrol)
+       return to_avs_dev(w->dapm->component->dev);
+ }
+-static struct avs_path_module *avs_get_kcontrol_module(struct avs_dev *adev, u32 id)
++static struct avs_path_module *avs_get_volume_module(struct avs_dev *adev, u32 id)
+ {
+       struct avs_path *path;
+       struct avs_path_pipeline *ppl;
+       struct avs_path_module *mod;
+-      list_for_each_entry(path, &adev->path_list, node)
+-              list_for_each_entry(ppl, &path->ppl_list, node)
+-                      list_for_each_entry(mod, &ppl->mod_list, node)
+-                              if (mod->template->ctl_id && mod->template->ctl_id == id)
++      spin_lock(&adev->path_list_lock);
++      list_for_each_entry(path, &adev->path_list, node) {
++              list_for_each_entry(ppl, &path->ppl_list, node) {
++                      list_for_each_entry(mod, &ppl->mod_list, node) {
++                              if (guid_equal(&mod->template->cfg_ext->type, &AVS_PEAKVOL_MOD_UUID)
++                                  && mod->template->ctl_id == id) {
++                                      spin_unlock(&adev->path_list_lock);
+                                       return mod;
++                              }
++                      }
++              }
++      }
++      spin_unlock(&adev->path_list_lock);
+       return NULL;
+ }
+@@ -49,7 +57,7 @@ int avs_control_volume_get(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_va
+       /* prevent access to modules while path is being constructed */
+       mutex_lock(&adev->path_mutex);
+-      active_module = avs_get_kcontrol_module(adev, ctl_data->id);
++      active_module = avs_get_volume_module(adev, ctl_data->id);
+       if (active_module) {
+               ret = avs_ipc_peakvol_get_volume(adev, active_module->module_id,
+                                                active_module->instance_id, &dspvols,
+@@ -89,7 +97,7 @@ int avs_control_volume_put(struct snd_kcontrol *kcontrol, struct snd_ctl_elem_va
+               changed = 1;
+       }
+-      active_module = avs_get_kcontrol_module(adev, ctl_data->id);
++      active_module = avs_get_volume_module(adev, ctl_data->id);
+       if (active_module) {
+               dspvol.channel_id = AVS_ALL_CHANNELS_MASK;
+               dspvol.target_volume = *volume;
+-- 
+2.39.2
+
diff --git a/queue-6.3/blk-mq-fix-race-condition-in-active-queue-accounting.patch b/queue-6.3/blk-mq-fix-race-condition-in-active-queue-accounting.patch
new file mode 100644 (file)
index 0000000..68b3739
--- /dev/null
@@ -0,0 +1,57 @@
+From 38bc239fae9e93ec20b3cc5f2d754923419afdb7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 17:05:55 -0400
+Subject: blk-mq: fix race condition in active queue accounting
+
+From: Tian Lan <tian.lan@twosigma.com>
+
+[ Upstream commit 3e94d54e83cafd2b562bb6d15bb2f72d76200fb5 ]
+
+If multiple CPUs are sharing the same hardware queue, it can
+cause leak in the active queue counter tracking when __blk_mq_tag_busy()
+is executed simultaneously.
+
+Fixes: ee78ec1077d3 ("blk-mq: blk_mq_tag_busy is no need to return a value")
+Signed-off-by: Tian Lan <tian.lan@twosigma.com>
+Reviewed-by: Ming Lei <ming.lei@redhat.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Reviewed-by: John Garry <john.g.garry@oracle.com>
+Link: https://lore.kernel.org/r/20230522210555.794134-1-tilan7663@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-mq-tag.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
+index 9eb968e14d31f..a80d7c62bdfe6 100644
+--- a/block/blk-mq-tag.c
++++ b/block/blk-mq-tag.c
+@@ -41,16 +41,20 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
+ {
+       unsigned int users;
++      /*
++       * calling test_bit() prior to test_and_set_bit() is intentional,
++       * it avoids dirtying the cacheline if the queue is already active.
++       */
+       if (blk_mq_is_shared_tags(hctx->flags)) {
+               struct request_queue *q = hctx->queue;
+-              if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
++              if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
++                  test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
+                       return;
+-              set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags);
+       } else {
+-              if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
++              if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
++                  test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
+                       return;
+-              set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
+       }
+       users = atomic_inc_return(&hctx->tags->active_queues);
+-- 
+2.39.2
+
diff --git a/queue-6.3/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch b/queue-6.3/blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch
new file mode 100644 (file)
index 0000000..8fe95f1
--- /dev/null
@@ -0,0 +1,65 @@
+From 1b9e778d5b9f2dde8af99adc6ceeee1dd53e883c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 20:18:54 +0800
+Subject: blk-wbt: fix that wbt can't be disabled by default
+
+From: Yu Kuai <yukuai3@huawei.com>
+
+[ Upstream commit 8a2b20a997a3779ae9fcae268f2959eb82ec05a1 ]
+
+commit b11d31ae01e6 ("blk-wbt: remove unnecessary check in
+wbt_enable_default()") removes the checking of CONFIG_BLK_WBT_MQ by
+mistake, which is used to control enable or disable wbt by default.
+
+Fix the problem by adding back the checking. This patch also do a litter
+cleanup to make related code more readable.
+
+Fixes: b11d31ae01e6 ("blk-wbt: remove unnecessary check in wbt_enable_default()")
+Reported-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Link: https://lore.kernel.org/lkml/CAKXUXMzfKq_J9nKHGyr5P5rvUETY4B-fxoQD4sO+NYjFOfVtZA@mail.gmail.com/t/
+Signed-off-by: Yu Kuai <yukuai3@huawei.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Link: https://lore.kernel.org/r/20230522121854.2928880-1-yukuai1@huaweicloud.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-wbt.c | 12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index e49a486845327..9ec2a2f1eda38 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -730,14 +730,16 @@ void wbt_enable_default(struct gendisk *disk)
+ {
+       struct request_queue *q = disk->queue;
+       struct rq_qos *rqos;
+-      bool disable_flag = q->elevator &&
+-                  test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags);
++      bool enable = IS_ENABLED(CONFIG_BLK_WBT_MQ);
++
++      if (q->elevator &&
++          test_bit(ELEVATOR_FLAG_DISABLE_WBT, &q->elevator->flags))
++              enable = false;
+       /* Throttling already enabled? */
+       rqos = wbt_rq_qos(q);
+       if (rqos) {
+-              if (!disable_flag &&
+-                  RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
++              if (enable && RQWB(rqos)->enable_state == WBT_STATE_OFF_DEFAULT)
+                       RQWB(rqos)->enable_state = WBT_STATE_ON_DEFAULT;
+               return;
+       }
+@@ -746,7 +748,7 @@ void wbt_enable_default(struct gendisk *disk)
+       if (!blk_queue_registered(q))
+               return;
+-      if (queue_is_mq(q) && !disable_flag)
++      if (queue_is_mq(q) && enable)
+               wbt_init(disk);
+ }
+ EXPORT_SYMBOL_GPL(wbt_enable_default);
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-netdev-init-the-offload-table-earlier.patch b/queue-6.3/bpf-netdev-init-the-offload-table-earlier.patch
new file mode 100644 (file)
index 0000000..ab540d9
--- /dev/null
@@ -0,0 +1,36 @@
+From 55b6db3767214a76dad9dca2976abc810e5bfce7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 5 May 2023 14:58:36 -0700
+Subject: bpf: netdev: init the offload table earlier
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit e1505c1cc8d527fcc5bcaf9c1ad82eed817e3e10 ]
+
+Some netdevices may get unregistered before late_initcall(),
+we have to move the hashtable init earlier.
+
+Fixes: f1fc43d03946 ("bpf: Move offload initialization into late_initcall")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217399
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/r/20230505215836.491485-1-kuba@kernel.org
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/offload.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c
+index 0c85e06f7ea7f..ee146430d9984 100644
+--- a/kernel/bpf/offload.c
++++ b/kernel/bpf/offload.c
+@@ -853,4 +853,4 @@ static int __init bpf_offload_init(void)
+       return rhashtable_init(&offdevs, &offdevs_params);
+ }
+-late_initcall(bpf_offload_init);
++core_initcall(bpf_offload_init);
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-convert-schedule_work-into-delayed_work.patch b/queue-6.3/bpf-sockmap-convert-schedule_work-into-delayed_work.patch
new file mode 100644 (file)
index 0000000..9d192e6
--- /dev/null
@@ -0,0 +1,190 @@
+From 440c2755c6057e5c33d3174fde5021b5dd591246 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:06 -0700
+Subject: bpf, sockmap: Convert schedule_work into delayed_work
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 29173d07f79883ac94f5570294f98af3d4287382 ]
+
+Sk_buffs are fed into sockmap verdict programs either from a strparser
+(when the user might want to decide how framing of skb is done by attaching
+another parser program) or directly through tcp_read_sock. The
+tcp_read_sock is the preferred method for performance when the BPF logic is
+a stream parser.
+
+The flow for Cilium's common use case with a stream parser is,
+
+ tcp_read_sock()
+  sk_psock_verdict_recv
+    ret = bpf_prog_run_pin_on_cpu()
+    sk_psock_verdict_apply(sock, skb, ret)
+     // if system is under memory pressure or app is slow we may
+     // need to queue skb. Do this queuing through ingress_skb and
+     // then kick timer to wake up handler
+     skb_queue_tail(ingress_skb, skb)
+     schedule_work(work);
+
+The work queue is wired up to sk_psock_backlog(). This will then walk the
+ingress_skb skb list that holds our sk_buffs that could not be handled,
+but should be OK to run at some later point. However, its possible that
+the workqueue doing this work still hits an error when sending the skb.
+When this happens the skbuff is requeued on a temporary 'state' struct
+kept with the workqueue. This is necessary because its possible to
+partially send an skbuff before hitting an error and we need to know how
+and where to restart when the workqueue runs next.
+
+Now for the trouble, we don't rekick the workqueue. This can cause a
+stall where the skbuff we just cached on the state variable might never
+be sent. This happens when its the last packet in a flow and no further
+packets come along that would cause the system to kick the workqueue from
+that side.
+
+To fix we could do simple schedule_work(), but while under memory pressure
+it makes sense to back off some instead of continue to retry repeatedly. So
+instead to fix convert schedule_work to schedule_delayed_work and add
+backoff logic to reschedule from backlog queue on errors. Its not obvious
+though what a good backoff is so use '1'.
+
+To test we observed some flakes whil running NGINX compliance test with
+sockmap we attributed these failed test to this bug and subsequent issue.
+
+>From on list discussion. This commit
+
+ bec217197b41("skmsg: Schedule psock work if the cached skb exists on the psock")
+
+was intended to address similar race, but had a couple cases it missed.
+Most obvious it only accounted for receiving traffic on the local socket
+so if redirecting into another socket we could still get an sk_buff stuck
+here. Next it missed the case where copied=0 in the recv() handler and
+then we wouldn't kick the scheduler. Also its sub-optimal to require
+userspace to kick the internal mechanisms of sockmap to wake it up and
+copy data to user. It results in an extra syscall and requires the app
+to actual handle the EAGAIN correctly.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: William Findlay <will@isovalent.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-3-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skmsg.h |  2 +-
+ net/core/skmsg.c      | 21 ++++++++++++++-------
+ net/core/sock_map.c   |  3 ++-
+ 3 files changed, 17 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index 84f787416a54d..904ff9a32ad61 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -105,7 +105,7 @@ struct sk_psock {
+       struct proto                    *sk_proto;
+       struct mutex                    work_mutex;
+       struct sk_psock_work_state      work_state;
+-      struct work_struct              work;
++      struct delayed_work             work;
+       struct rcu_work                 rwork;
+ };
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 4a3dc8d272957..0a9ee2acac0bb 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -482,7 +482,7 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+       }
+ out:
+       if (psock->work_state.skb && copied > 0)
+-              schedule_work(&psock->work);
++              schedule_delayed_work(&psock->work, 0);
+       return copied;
+ }
+ EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+@@ -640,7 +640,8 @@ static void sk_psock_skb_state(struct sk_psock *psock,
+ static void sk_psock_backlog(struct work_struct *work)
+ {
+-      struct sk_psock *psock = container_of(work, struct sk_psock, work);
++      struct delayed_work *dwork = to_delayed_work(work);
++      struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
+       struct sk_psock_work_state *state = &psock->work_state;
+       struct sk_buff *skb = NULL;
+       bool ingress;
+@@ -680,6 +681,12 @@ static void sk_psock_backlog(struct work_struct *work)
+                               if (ret == -EAGAIN) {
+                                       sk_psock_skb_state(psock, state, skb,
+                                                          len, off);
++
++                                      /* Delay slightly to prioritize any
++                                       * other work that might be here.
++                                       */
++                                      if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
++                                              schedule_delayed_work(&psock->work, 1);
+                                       goto end;
+                               }
+                               /* Hard errors break pipe and stop xmit. */
+@@ -734,7 +741,7 @@ struct sk_psock *sk_psock_init(struct sock *sk, int node)
+       INIT_LIST_HEAD(&psock->link);
+       spin_lock_init(&psock->link_lock);
+-      INIT_WORK(&psock->work, sk_psock_backlog);
++      INIT_DELAYED_WORK(&psock->work, sk_psock_backlog);
+       mutex_init(&psock->work_mutex);
+       INIT_LIST_HEAD(&psock->ingress_msg);
+       spin_lock_init(&psock->ingress_lock);
+@@ -823,7 +830,7 @@ static void sk_psock_destroy(struct work_struct *work)
+       sk_psock_done_strp(psock);
+-      cancel_work_sync(&psock->work);
++      cancel_delayed_work_sync(&psock->work);
+       mutex_destroy(&psock->work_mutex);
+       psock_progs_drop(&psock->progs);
+@@ -938,7 +945,7 @@ static int sk_psock_skb_redirect(struct sk_psock *from, struct sk_buff *skb)
+       }
+       skb_queue_tail(&psock_other->ingress_skb, skb);
+-      schedule_work(&psock_other->work);
++      schedule_delayed_work(&psock_other->work, 0);
+       spin_unlock_bh(&psock_other->ingress_lock);
+       return 0;
+ }
+@@ -1018,7 +1025,7 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+                       spin_lock_bh(&psock->ingress_lock);
+                       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+                               skb_queue_tail(&psock->ingress_skb, skb);
+-                              schedule_work(&psock->work);
++                              schedule_delayed_work(&psock->work, 0);
+                               err = 0;
+                       }
+                       spin_unlock_bh(&psock->ingress_lock);
+@@ -1049,7 +1056,7 @@ static void sk_psock_write_space(struct sock *sk)
+       psock = sk_psock(sk);
+       if (likely(psock)) {
+               if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+-                      schedule_work(&psock->work);
++                      schedule_delayed_work(&psock->work, 0);
+               write_space = psock->saved_write_space;
+       }
+       rcu_read_unlock();
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index a055139f410e2..08851511294c0 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -1624,9 +1624,10 @@ void sock_map_close(struct sock *sk, long timeout)
+               rcu_read_unlock();
+               sk_psock_stop(psock);
+               release_sock(sk);
+-              cancel_work_sync(&psock->work);
++              cancel_delayed_work_sync(&psock->work);
+               sk_psock_put(sk, psock);
+       }
++
+       /* Make sure we do not recurse. This is a bug.
+        * Leak the socket instead of crashing on a stack overflow.
+        */
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-handle-fin-correctly.patch b/queue-6.3/bpf-sockmap-handle-fin-correctly.patch
new file mode 100644 (file)
index 0000000..37d041f
--- /dev/null
@@ -0,0 +1,83 @@
+From 1152bdef0eca7b4d7fc20fcea41014f0163028c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:09 -0700
+Subject: bpf, sockmap: Handle fin correctly
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 901546fd8f9ca4b5c481ce00928ab425ce9aacc0 ]
+
+The sockmap code is returning EAGAIN after a FIN packet is received and no
+more data is on the receive queue. Correct behavior is to return 0 to the
+user and the user can then close the socket. The EAGAIN causes many apps
+to retry which masks the problem. Eventually the socket is evicted from
+the sockmap because its released from sockmap sock free handling. The
+issue creates a delay and can cause some errors on application side.
+
+To fix this check on sk_msg_recvmsg side if length is zero and FIN flag
+is set then set return to zero. A selftest will be added to check this
+condition.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: William Findlay <will@isovalent.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-6-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_bpf.c | 31 +++++++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 2e9547467edbe..73c13642d47f6 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -174,6 +174,24 @@ static int tcp_msg_wait_data(struct sock *sk, struct sk_psock *psock,
+       return ret;
+ }
++static bool is_next_msg_fin(struct sk_psock *psock)
++{
++      struct scatterlist *sge;
++      struct sk_msg *msg_rx;
++      int i;
++
++      msg_rx = sk_psock_peek_msg(psock);
++      i = msg_rx->sg.start;
++      sge = sk_msg_elem(msg_rx, i);
++      if (!sge->length) {
++              struct sk_buff *skb = msg_rx->skb;
++
++              if (skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
++                      return true;
++      }
++      return false;
++}
++
+ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+                                 struct msghdr *msg,
+                                 size_t len,
+@@ -196,6 +214,19 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+       lock_sock(sk);
+ msg_bytes_ready:
+       copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
++      /* The typical case for EFAULT is the socket was gracefully
++       * shutdown with a FIN pkt. So check here the other case is
++       * some error on copy_page_to_iter which would be unexpected.
++       * On fin return correct return code to zero.
++       */
++      if (copied == -EFAULT) {
++              bool is_fin = is_next_msg_fin(psock);
++
++              if (is_fin) {
++                      copied = 0;
++                      goto out;
++              }
++      }
+       if (!copied) {
+               long timeo;
+               int data;
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-improved-check-for-empty-queue.patch b/queue-6.3/bpf-sockmap-improved-check-for-empty-queue.patch
new file mode 100644 (file)
index 0000000..7a302fb
--- /dev/null
@@ -0,0 +1,178 @@
+From bd2062c777cc50a0c6651abfe0e338faed217c9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:08 -0700
+Subject: bpf, sockmap: Improved check for empty queue
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 405df89dd52cbcd69a3cd7d9a10d64de38f854b2 ]
+
+We noticed some rare sk_buffs were stepping past the queue when system was
+under memory pressure. The general theory is to skip enqueueing
+sk_buffs when its not necessary which is the normal case with a system
+that is properly provisioned for the task, no memory pressure and enough
+cpu assigned.
+
+But, if we can't allocate memory due to an ENOMEM error when enqueueing
+the sk_buff into the sockmap receive queue we push it onto a delayed
+workqueue to retry later. When a new sk_buff is received we then check
+if that queue is empty. However, there is a problem with simply checking
+the queue length. When a sk_buff is being processed from the ingress queue
+but not yet on the sockmap msg receive queue its possible to also recv
+a sk_buff through normal path. It will check the ingress queue which is
+zero and then skip ahead of the pkt being processed.
+
+Previously we used sock lock from both contexts which made the problem
+harder to hit, but not impossible.
+
+To fix instead of popping the skb from the queue entirely we peek the
+skb from the queue and do the copy there. This ensures checks to the
+queue length are non-zero while skb is being processed. Then finally
+when the entire skb has been copied to user space queue or another
+socket we pop it off the queue. This way the queue length check allows
+bypassing the queue only after the list has been completely processed.
+
+To reproduce issue we run NGINX compliance test with sockmap running and
+observe some flakes in our testing that we attributed to this issue.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Suggested-by: Jakub Sitnicki <jakub@cloudflare.com>
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: William Findlay <will@isovalent.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-5-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skmsg.h |  1 -
+ net/core/skmsg.c      | 32 ++++++++------------------------
+ 2 files changed, 8 insertions(+), 25 deletions(-)
+
+diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
+index 904ff9a32ad61..054d7911bfc9f 100644
+--- a/include/linux/skmsg.h
++++ b/include/linux/skmsg.h
+@@ -71,7 +71,6 @@ struct sk_psock_link {
+ };
+ struct sk_psock_work_state {
+-      struct sk_buff                  *skb;
+       u32                             len;
+       u32                             off;
+ };
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 76ff15f8bb06e..bcd45a99a3db3 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -622,16 +622,12 @@ static int sk_psock_handle_skb(struct sk_psock *psock, struct sk_buff *skb,
+ static void sk_psock_skb_state(struct sk_psock *psock,
+                              struct sk_psock_work_state *state,
+-                             struct sk_buff *skb,
+                              int len, int off)
+ {
+       spin_lock_bh(&psock->ingress_lock);
+       if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+-              state->skb = skb;
+               state->len = len;
+               state->off = off;
+-      } else {
+-              sock_drop(psock->sk, skb);
+       }
+       spin_unlock_bh(&psock->ingress_lock);
+ }
+@@ -642,23 +638,17 @@ static void sk_psock_backlog(struct work_struct *work)
+       struct sk_psock *psock = container_of(dwork, struct sk_psock, work);
+       struct sk_psock_work_state *state = &psock->work_state;
+       struct sk_buff *skb = NULL;
++      u32 len = 0, off = 0;
+       bool ingress;
+-      u32 len, off;
+       int ret;
+       mutex_lock(&psock->work_mutex);
+-      if (unlikely(state->skb)) {
+-              spin_lock_bh(&psock->ingress_lock);
+-              skb = state->skb;
++      if (unlikely(state->len)) {
+               len = state->len;
+               off = state->off;
+-              state->skb = NULL;
+-              spin_unlock_bh(&psock->ingress_lock);
+       }
+-      if (skb)
+-              goto start;
+-      while ((skb = skb_dequeue(&psock->ingress_skb))) {
++      while ((skb = skb_peek(&psock->ingress_skb))) {
+               len = skb->len;
+               off = 0;
+               if (skb_bpf_strparser(skb)) {
+@@ -667,7 +657,6 @@ static void sk_psock_backlog(struct work_struct *work)
+                       off = stm->offset;
+                       len = stm->full_len;
+               }
+-start:
+               ingress = skb_bpf_ingress(skb);
+               skb_bpf_redirect_clear(skb);
+               do {
+@@ -677,8 +666,7 @@ static void sk_psock_backlog(struct work_struct *work)
+                                                         len, ingress);
+                       if (ret <= 0) {
+                               if (ret == -EAGAIN) {
+-                                      sk_psock_skb_state(psock, state, skb,
+-                                                         len, off);
++                                      sk_psock_skb_state(psock, state, len, off);
+                                       /* Delay slightly to prioritize any
+                                        * other work that might be here.
+@@ -690,15 +678,16 @@ static void sk_psock_backlog(struct work_struct *work)
+                               /* Hard errors break pipe and stop xmit. */
+                               sk_psock_report_error(psock, ret ? -ret : EPIPE);
+                               sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+-                              sock_drop(psock->sk, skb);
+                               goto end;
+                       }
+                       off += ret;
+                       len -= ret;
+               } while (len);
+-              if (!ingress)
++              skb = skb_dequeue(&psock->ingress_skb);
++              if (!ingress) {
+                       kfree_skb(skb);
++              }
+       }
+ end:
+       mutex_unlock(&psock->work_mutex);
+@@ -791,11 +780,6 @@ static void __sk_psock_zap_ingress(struct sk_psock *psock)
+               skb_bpf_redirect_clear(skb);
+               sock_drop(psock->sk, skb);
+       }
+-      kfree_skb(psock->work_state.skb);
+-      /* We null the skb here to ensure that calls to sk_psock_backlog
+-       * do not pick up the free'd skb.
+-       */
+-      psock->work_state.skb = NULL;
+       __sk_psock_purge_ingress_msg(psock);
+ }
+@@ -814,7 +798,6 @@ void sk_psock_stop(struct sk_psock *psock)
+       spin_lock_bh(&psock->ingress_lock);
+       sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
+       sk_psock_cork_free(psock);
+-      __sk_psock_zap_ingress(psock);
+       spin_unlock_bh(&psock->ingress_lock);
+ }
+@@ -829,6 +812,7 @@ static void sk_psock_destroy(struct work_struct *work)
+       sk_psock_done_strp(psock);
+       cancel_delayed_work_sync(&psock->work);
++      __sk_psock_zap_ingress(psock);
+       mutex_destroy(&psock->work_mutex);
+       psock_progs_drop(&psock->progs);
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-incorrectly-handling-copied_seq.patch b/queue-6.3/bpf-sockmap-incorrectly-handling-copied_seq.patch
new file mode 100644 (file)
index 0000000..7f02cfe
--- /dev/null
@@ -0,0 +1,235 @@
+From 5d3c6baaa7d74ad7275661585ebd6c0e49d4a8f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:12 -0700
+Subject: bpf, sockmap: Incorrectly handling copied_seq
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit e5c6de5fa025882babf89cecbed80acf49b987fa ]
+
+The read_skb() logic is incrementing the tcp->copied_seq which is used for
+among other things calculating how many outstanding bytes can be read by
+the application. This results in application errors, if the application
+does an ioctl(FIONREAD) we return zero because this is calculated from
+the copied_seq value.
+
+To fix this we move tcp->copied_seq accounting into the recv handler so
+that we update these when the recvmsg() hook is called and data is in
+fact copied into user buffers. This gives an accurate FIONREAD value
+as expected and improves ACK handling. Before we were calling the
+tcp_rcv_space_adjust() which would update 'number of bytes copied to
+user in last RTT' which is wrong for programs returning SK_PASS. The
+bytes are only copied to the user when recvmsg is handled.
+
+Doing the fix for recvmsg is straightforward, but fixing redirect and
+SK_DROP pkts is a bit tricker. Build a tcp_psock_eat() helper and then
+call this from skmsg handlers. This fixes another issue where a broken
+socket with a BPF program doing a resubmit could hang the receiver. This
+happened because although read_skb() consumed the skb through sock_drop()
+it did not update the copied_seq. Now if a single reccv socket is
+redirecting to many sockets (for example for lb) the receiver sk will be
+hung even though we might expect it to continue. The hang comes from
+not updating the copied_seq numbers and memory pressure resulting from
+that.
+
+We have a slight layer problem of calling tcp_eat_skb even if its not
+a TCP socket. To fix we could refactor and create per type receiver
+handlers. I decided this is more work than we want in the fix and we
+already have some small tweaks depending on caller that use the
+helper skb_bpf_strparser(). So we extend that a bit and always set
+the strparser bit when it is in use and then we can gate the
+seq_copied updates on this.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-9-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h  | 10 ++++++++++
+ net/core/skmsg.c   | 15 +++++++--------
+ net/ipv4/tcp.c     | 10 +---------
+ net/ipv4/tcp_bpf.c | 28 +++++++++++++++++++++++++++-
+ 4 files changed, 45 insertions(+), 18 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index db9f828e9d1ee..76bf0a11bdc77 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1467,6 +1467,8 @@ static inline void tcp_adjust_rcv_ssthresh(struct sock *sk)
+ }
+ void tcp_cleanup_rbuf(struct sock *sk, int copied);
++void __tcp_cleanup_rbuf(struct sock *sk, int copied);
++
+ /* We provision sk_rcvbuf around 200% of sk_rcvlowat.
+  * If 87.5 % (7/8) of the space has been consumed, we want to override
+@@ -2323,6 +2325,14 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
+ void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
+ #endif /* CONFIG_BPF_SYSCALL */
++#ifdef CONFIG_INET
++void tcp_eat_skb(struct sock *sk, struct sk_buff *skb);
++#else
++static inline void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
++{
++}
++#endif
++
+ int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
+                         struct sk_msg *msg, u32 bytes, int flags);
+ #endif /* CONFIG_NET_SOCK_MSG */
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 08be5f409fb89..a9060e1f0e437 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -979,10 +979,8 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+               err = -EIO;
+               sk_other = psock->sk;
+               if (sock_flag(sk_other, SOCK_DEAD) ||
+-                  !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)) {
+-                      skb_bpf_redirect_clear(skb);
++                  !sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+                       goto out_free;
+-              }
+               skb_bpf_set_ingress(skb);
+@@ -1011,18 +1009,19 @@ static int sk_psock_verdict_apply(struct sk_psock *psock, struct sk_buff *skb,
+                               err = 0;
+                       }
+                       spin_unlock_bh(&psock->ingress_lock);
+-                      if (err < 0) {
+-                              skb_bpf_redirect_clear(skb);
++                      if (err < 0)
+                               goto out_free;
+-                      }
+               }
+               break;
+       case __SK_REDIRECT:
++              tcp_eat_skb(psock->sk, skb);
+               err = sk_psock_skb_redirect(psock, skb);
+               break;
+       case __SK_DROP:
+       default:
+ out_free:
++              skb_bpf_redirect_clear(skb);
++              tcp_eat_skb(psock->sk, skb);
+               sock_drop(psock->sk, skb);
+       }
+@@ -1067,8 +1066,7 @@ static void sk_psock_strp_read(struct strparser *strp, struct sk_buff *skb)
+               skb_dst_drop(skb);
+               skb_bpf_redirect_clear(skb);
+               ret = bpf_prog_run_pin_on_cpu(prog, skb);
+-              if (ret == SK_PASS)
+-                      skb_bpf_set_strparser(skb);
++              skb_bpf_set_strparser(skb);
+               ret = sk_psock_map_verd(ret, skb_bpf_redirect_fetch(skb));
+               skb->sk = NULL;
+       }
+@@ -1176,6 +1174,7 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
+       psock = sk_psock(sk);
+       if (unlikely(!psock)) {
+               len = 0;
++              tcp_eat_skb(sk, skb);
+               sock_drop(sk, skb);
+               goto out;
+       }
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 01ea2705deea9..ed63ee8f0d7e3 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1570,7 +1570,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
+  * calculation of whether or not we must ACK for the sake of
+  * a window update.
+  */
+-static void __tcp_cleanup_rbuf(struct sock *sk, int copied)
++void __tcp_cleanup_rbuf(struct sock *sk, int copied)
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+       bool time_to_ack = false;
+@@ -1785,14 +1785,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+                       break;
+               }
+       }
+-      WRITE_ONCE(tp->copied_seq, seq);
+-
+-      tcp_rcv_space_adjust(sk);
+-
+-      /* Clean up data we have read: This will do ACK frames. */
+-      if (copied > 0)
+-              __tcp_cleanup_rbuf(sk, copied);
+-
+       return copied;
+ }
+ EXPORT_SYMBOL(tcp_read_skb);
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 01dd76be1a584..5f93918c063c7 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -11,6 +11,24 @@
+ #include <net/inet_common.h>
+ #include <net/tls.h>
++void tcp_eat_skb(struct sock *sk, struct sk_buff *skb)
++{
++      struct tcp_sock *tcp;
++      int copied;
++
++      if (!skb || !skb->len || !sk_is_tcp(sk))
++              return;
++
++      if (skb_bpf_strparser(skb))
++              return;
++
++      tcp = tcp_sk(sk);
++      copied = tcp->copied_seq + skb->len;
++      WRITE_ONCE(tcp->copied_seq, copied);
++      tcp_rcv_space_adjust(sk);
++      __tcp_cleanup_rbuf(sk, skb->len);
++}
++
+ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
+                          struct sk_msg *msg, u32 apply_bytes, int flags)
+ {
+@@ -198,8 +216,10 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+                                 int flags,
+                                 int *addr_len)
+ {
++      struct tcp_sock *tcp = tcp_sk(sk);
++      u32 seq = tcp->copied_seq;
+       struct sk_psock *psock;
+-      int copied;
++      int copied = 0;
+       if (unlikely(flags & MSG_ERRQUEUE))
+               return inet_recv_error(sk, msg, len, addr_len);
+@@ -244,9 +264,11 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+               if (is_fin) {
+                       copied = 0;
++                      seq++;
+                       goto out;
+               }
+       }
++      seq += copied;
+       if (!copied) {
+               long timeo;
+               int data;
+@@ -284,6 +306,10 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+               copied = -EAGAIN;
+       }
+ out:
++      WRITE_ONCE(tcp->copied_seq, seq);
++      tcp_rcv_space_adjust(sk);
++      if (copied > 0)
++              __tcp_cleanup_rbuf(sk, copied);
+       release_sock(sk);
+       sk_psock_put(sk, psock);
+       return copied;
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-pass-skb-ownership-through-read_skb.patch b/queue-6.3/bpf-sockmap-pass-skb-ownership-through-read_skb.patch
new file mode 100644 (file)
index 0000000..05d063e
--- /dev/null
@@ -0,0 +1,159 @@
+From 82e068d34b3f4b48e6802390c884f79373769ad1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:05 -0700
+Subject: bpf, sockmap: Pass skb ownership through read_skb
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 78fa0d61d97a728d306b0c23d353c0e340756437 ]
+
+The read_skb hook calls consume_skb() now, but this means that if the
+recv_actor program wants to use the skb it needs to inc the ref cnt
+so that the consume_skb() doesn't kfree the sk_buff.
+
+This is problematic because in some error cases under memory pressure
+we may need to linearize the sk_buff from sk_psock_skb_ingress_enqueue().
+Then we get this,
+
+ skb_linearize()
+   __pskb_pull_tail()
+     pskb_expand_head()
+       BUG_ON(skb_shared(skb))
+
+Because we incremented users refcnt from sk_psock_verdict_recv() we
+hit the bug on with refcnt > 1 and trip it.
+
+To fix lets simply pass ownership of the sk_buff through the skb_read
+call. Then we can drop the consume from read_skb handlers and assume
+the verdict recv does any required kfree.
+
+Bug found while testing in our CI which runs in VMs that hit memory
+constraints rather regularly. William tested TCP read_skb handlers.
+
+[  106.536188] ------------[ cut here ]------------
+[  106.536197] kernel BUG at net/core/skbuff.c:1693!
+[  106.536479] invalid opcode: 0000 [#1] PREEMPT SMP PTI
+[  106.536726] CPU: 3 PID: 1495 Comm: curl Not tainted 5.19.0-rc5 #1
+[  106.537023] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ArchLinux 1.16.0-1 04/01/2014
+[  106.537467] RIP: 0010:pskb_expand_head+0x269/0x330
+[  106.538585] RSP: 0018:ffffc90000138b68 EFLAGS: 00010202
+[  106.538839] RAX: 000000000000003f RBX: ffff8881048940e8 RCX: 0000000000000a20
+[  106.539186] RDX: 0000000000000002 RSI: 0000000000000000 RDI: ffff8881048940e8
+[  106.539529] RBP: ffffc90000138be8 R08: 00000000e161fd1a R09: 0000000000000000
+[  106.539877] R10: 0000000000000018 R11: 0000000000000000 R12: ffff8881048940e8
+[  106.540222] R13: 0000000000000003 R14: 0000000000000000 R15: ffff8881048940e8
+[  106.540568] FS:  00007f277dde9f00(0000) GS:ffff88813bd80000(0000) knlGS:0000000000000000
+[  106.540954] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  106.541227] CR2: 00007f277eeede64 CR3: 000000000ad3e000 CR4: 00000000000006e0
+[  106.541569] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[  106.541915] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[  106.542255] Call Trace:
+[  106.542383]  <IRQ>
+[  106.542487]  __pskb_pull_tail+0x4b/0x3e0
+[  106.542681]  skb_ensure_writable+0x85/0xa0
+[  106.542882]  sk_skb_pull_data+0x18/0x20
+[  106.543084]  bpf_prog_b517a65a242018b0_bpf_skskb_http_verdict+0x3a9/0x4aa9
+[  106.543536]  ? migrate_disable+0x66/0x80
+[  106.543871]  sk_psock_verdict_recv+0xe2/0x310
+[  106.544258]  ? sk_psock_write_space+0x1f0/0x1f0
+[  106.544561]  tcp_read_skb+0x7b/0x120
+[  106.544740]  tcp_data_queue+0x904/0xee0
+[  106.544931]  tcp_rcv_established+0x212/0x7c0
+[  106.545142]  tcp_v4_do_rcv+0x174/0x2a0
+[  106.545326]  tcp_v4_rcv+0xe70/0xf60
+[  106.545500]  ip_protocol_deliver_rcu+0x48/0x290
+[  106.545744]  ip_local_deliver_finish+0xa7/0x150
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Reported-by: William Findlay <will@isovalent.com>
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Tested-by: William Findlay <will@isovalent.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-2-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skmsg.c   | 2 --
+ net/ipv4/tcp.c     | 1 -
+ net/ipv4/udp.c     | 7 ++-----
+ net/unix/af_unix.c | 7 ++-----
+ 4 files changed, 4 insertions(+), 13 deletions(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index f81883759d381..4a3dc8d272957 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -1183,8 +1183,6 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
+       int ret = __SK_DROP;
+       int len = skb->len;
+-      skb_get(skb);
+-
+       rcu_read_lock();
+       psock = sk_psock(sk);
+       if (unlikely(!psock)) {
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 6c7c666554ced..01ea2705deea9 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -1772,7 +1772,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+               WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+               tcp_flags = TCP_SKB_CB(skb)->tcp_flags;
+               used = recv_actor(sk, skb);
+-              consume_skb(skb);
+               if (used < 0) {
+                       if (!copied)
+                               copied = used;
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index c605d171eb2d9..8aaae82e78aeb 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -1813,7 +1813,7 @@ EXPORT_SYMBOL(__skb_recv_udp);
+ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+ {
+       struct sk_buff *skb;
+-      int err, copied;
++      int err;
+ try_again:
+       skb = skb_recv_udp(sk, MSG_DONTWAIT, &err);
+@@ -1832,10 +1832,7 @@ int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+       }
+       WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk));
+-      copied = recv_actor(sk, skb);
+-      kfree_skb(skb);
+-
+-      return copied;
++      return recv_actor(sk, skb);
+ }
+ EXPORT_SYMBOL(udp_read_skb);
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index 29c6083a37daf..9383afe3e570b 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -2553,7 +2553,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+ {
+       struct unix_sock *u = unix_sk(sk);
+       struct sk_buff *skb;
+-      int err, copied;
++      int err;
+       mutex_lock(&u->iolock);
+       skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err);
+@@ -2561,10 +2561,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
+       if (!skb)
+               return err;
+-      copied = recv_actor(sk, skb);
+-      kfree_skb(skb);
+-
+-      return copied;
++      return recv_actor(sk, skb);
+ }
+ /*
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-reschedule-is-now-done-through-backlog.patch b/queue-6.3/bpf-sockmap-reschedule-is-now-done-through-backlog.patch
new file mode 100644 (file)
index 0000000..e28bdca
--- /dev/null
@@ -0,0 +1,48 @@
+From 34b36eb5cb79ae033bd3e9ac181ad618073323d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:07 -0700
+Subject: bpf, sockmap: Reschedule is now done through backlog
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit bce22552f92ea7c577f49839b8e8f7d29afaf880 ]
+
+Now that the backlog manages the reschedule() logic correctly we can drop
+the partial fix to reschedule from recvmsg hook.
+
+Rescheduling on recvmsg hook was added to address a corner case where we
+still had data in the backlog state but had nothing to kick it and
+reschedule the backlog worker to run and finish copying data out of the
+state. This had a couple limitations, first it required user space to
+kick it introducing an unnecessary EBUSY and retry. Second it only
+handled the ingress case and egress redirects would still be hung.
+
+With the correct fix, pushing the reschedule logic down to where the
+enomem error occurs we can drop this fix.
+
+Fixes: bec217197b412 ("skmsg: Schedule psock work if the cached skb exists on the psock")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-4-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skmsg.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index 0a9ee2acac0bb..76ff15f8bb06e 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -481,8 +481,6 @@ int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg,
+               msg_rx = sk_psock_peek_msg(psock);
+       }
+ out:
+-      if (psock->work_state.skb && copied > 0)
+-              schedule_delayed_work(&psock->work, 0);
+       return copied;
+ }
+ EXPORT_SYMBOL_GPL(sk_msg_recvmsg);
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-tcp-data-stall-on-recv-before-accept.patch b/queue-6.3/bpf-sockmap-tcp-data-stall-on-recv-before-accept.patch
new file mode 100644 (file)
index 0000000..1e82869
--- /dev/null
@@ -0,0 +1,96 @@
+From 5bea649c2c4e36e62762e787031533c0e17b5faf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:10 -0700
+Subject: bpf, sockmap: TCP data stall on recv before accept
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit ea444185a6bf7da4dd0df1598ee953e4f7174858 ]
+
+A common mechanism to put a TCP socket into the sockmap is to hook the
+BPF_SOCK_OPS_{ACTIVE_PASSIVE}_ESTABLISHED_CB event with a BPF program
+that can map the socket info to the correct BPF verdict parser. When
+the user adds the socket to the map the psock is created and the new
+ops are assigned to ensure the verdict program will 'see' the sk_buffs
+as they arrive.
+
+Part of this process hooks the sk_data_ready op with a BPF specific
+handler to wake up the BPF verdict program when data is ready to read.
+The logic is simple enough (posted here for easy reading)
+
+ static void sk_psock_verdict_data_ready(struct sock *sk)
+ {
+       struct socket *sock = sk->sk_socket;
+
+       if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
+               return;
+       sock->ops->read_skb(sk, sk_psock_verdict_recv);
+ }
+
+The oversight here is sk->sk_socket is not assigned until the application
+accepts() the new socket. However, its entirely ok for the peer application
+to do a connect() followed immediately by sends. The socket on the receiver
+is sitting on the backlog queue of the listening socket until its accepted
+and the data is queued up. If the peer never accepts the socket or is slow
+it will eventually hit data limits and rate limit the session. But,
+important for BPF sockmap hooks when this data is received TCP stack does
+the sk_data_ready() call but the read_skb() for this data is never called
+because sk_socket is missing. The data sits on the sk_receive_queue.
+
+Then once the socket is accepted if we never receive more data from the
+peer there will be no further sk_data_ready calls and all the data
+is still on the sk_receive_queue(). Then user calls recvmsg after accept()
+and for TCP sockets in sockmap we use the tcp_bpf_recvmsg_parser() handler.
+The handler checks for data in the sk_msg ingress queue expecting that
+the BPF program has already run from the sk_data_ready hook and enqueued
+the data as needed. So we are stuck.
+
+To fix do an unlikely check in recvmsg handler for data on the
+sk_receive_queue and if it exists wake up data_ready. We have the sock
+locked in both read_skb and recvmsg so should avoid having multiple
+runners.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-7-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_bpf.c | 20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
+index 73c13642d47f6..01dd76be1a584 100644
+--- a/net/ipv4/tcp_bpf.c
++++ b/net/ipv4/tcp_bpf.c
+@@ -212,6 +212,26 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
+               return tcp_recvmsg(sk, msg, len, flags, addr_len);
+       lock_sock(sk);
++
++      /* We may have received data on the sk_receive_queue pre-accept and
++       * then we can not use read_skb in this context because we haven't
++       * assigned a sk_socket yet so have no link to the ops. The work-around
++       * is to check the sk_receive_queue and in these cases read skbs off
++       * queue again. The read_skb hook is not running at this point because
++       * of lock_sock so we avoid having multiple runners in read_skb.
++       */
++      if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
++              tcp_data_ready(sk);
++              /* This handles the ENOMEM errors if we both receive data
++               * pre accept and are already under memory pressure. At least
++               * let user know to retry.
++               */
++              if (unlikely(!skb_queue_empty(&sk->sk_receive_queue))) {
++                      copied = -EAGAIN;
++                      goto out;
++              }
++      }
++
+ msg_bytes_ready:
+       copied = sk_msg_recvmsg(sk, psock, msg, len, flags);
+       /* The typical case for EFAULT is the socket was gracefully
+-- 
+2.39.2
+
diff --git a/queue-6.3/bpf-sockmap-wake-up-polling-after-data-copy.patch b/queue-6.3/bpf-sockmap-wake-up-polling-after-data-copy.patch
new file mode 100644 (file)
index 0000000..26819a9
--- /dev/null
@@ -0,0 +1,62 @@
+From 0ce8249143e66bbb4f5a5efb78917ab4ba90cae6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 22 May 2023 19:56:11 -0700
+Subject: bpf, sockmap: Wake up polling after data copy
+
+From: John Fastabend <john.fastabend@gmail.com>
+
+[ Upstream commit 6df7f764cd3cf5a03a4a47b23be47e57e41fcd85 ]
+
+When TCP stack has data ready to read sk_data_ready() is called. Sockmap
+overwrites this with its own handler to call into BPF verdict program.
+But, the original TCP socket had sock_def_readable that would additionally
+wake up any user space waiters with sk_wake_async().
+
+Sockmap saved the callback when the socket was created so call the saved
+data ready callback and then we can wake up any epoll() logic waiting
+on the read.
+
+Note we call on 'copied >= 0' to account for returning 0 when a FIN is
+received because we need to wake up user for this as well so they
+can do the recvmsg() -> 0 and detect the shutdown.
+
+Fixes: 04919bed948dc ("tcp: Introduce tcp_read_skb()")
+Signed-off-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/bpf/20230523025618.113937-8-john.fastabend@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/skmsg.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/skmsg.c b/net/core/skmsg.c
+index bcd45a99a3db3..08be5f409fb89 100644
+--- a/net/core/skmsg.c
++++ b/net/core/skmsg.c
+@@ -1199,12 +1199,21 @@ static int sk_psock_verdict_recv(struct sock *sk, struct sk_buff *skb)
+ static void sk_psock_verdict_data_ready(struct sock *sk)
+ {
+       struct socket *sock = sk->sk_socket;
++      int copied;
+       trace_sk_data_ready(sk);
+       if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
+               return;
+-      sock->ops->read_skb(sk, sk_psock_verdict_recv);
++      copied = sock->ops->read_skb(sk, sk_psock_verdict_recv);
++      if (copied >= 0) {
++              struct sk_psock *psock;
++
++              rcu_read_lock();
++              psock = sk_psock(sk);
++              psock->saved_data_ready(sk);
++              rcu_read_unlock();
++      }
+ }
+ void sk_psock_start_verdict(struct sock *sk, struct sk_psock *psock)
+-- 
+2.39.2
+
diff --git a/queue-6.3/coresight-perf-release-coresight-path-when-alloc-tra.patch b/queue-6.3/coresight-perf-release-coresight-path-when-alloc-tra.patch
new file mode 100644 (file)
index 0000000..555c40b
--- /dev/null
@@ -0,0 +1,38 @@
+From 41079fc7fc10d6ca32049d4f9a5dba7a1d707329 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Apr 2023 11:24:16 +0800
+Subject: coresight: perf: Release Coresight path when alloc trace id failed
+
+From: Ruidong Tian <tianruidong@linux.alibaba.com>
+
+[ Upstream commit 04ac7f98b92181179ea84439642493f3826d04a2 ]
+
+Error handler for etm_setup_aux can not release coresight path because
+cpu mask was cleared when coresight_trace_id_get_cpu_id failed.
+
+Call coresight_release_path function explicitly when alloc trace id filed.
+
+Fixes: 4ff1fdb4125c4 ("coresight: perf: traceid: Add perf ID allocation and notifiers")
+Signed-off-by: Ruidong Tian <tianruidong@linux.alibaba.com>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Link: https://lore.kernel.org/r/20230425032416.125542-1-tianruidong@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-etm-perf.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
+index 711f451b69469..89e8ed214ea49 100644
+--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
++++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
+@@ -402,6 +402,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
+               trace_id = coresight_trace_id_get_cpu_id(cpu);
+               if (!IS_VALID_CS_TRACE_ID(trace_id)) {
+                       cpumask_clear_cpu(cpu, mask);
++                      coresight_release_path(path);
+                       continue;
+               }
+-- 
+2.39.2
+
diff --git a/queue-6.3/cpufreq-amd-pstate-remove-fast_switch_possible-flag-.patch b/queue-6.3/cpufreq-amd-pstate-remove-fast_switch_possible-flag-.patch
new file mode 100644 (file)
index 0000000..19ce953
--- /dev/null
@@ -0,0 +1,45 @@
+From 81f366c503753c3b73e7b0f2823e23382fab96b3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 May 2023 16:28:16 +0000
+Subject: cpufreq: amd-pstate: Remove fast_switch_possible flag from active
+ driver
+
+From: Wyes Karny <wyes.karny@amd.com>
+
+[ Upstream commit 249b62c448de7117c18531d626aed6e153cdfd75 ]
+
+amd_pstate active mode driver is only compatible with static governors.
+Therefore it doesn't need fast_switch functionality. Remove
+fast_switch_possible flag from amd_pstate active mode driver.
+
+Fixes: ffa5096a7c33 ("cpufreq: amd-pstate: implement Pstate EPP support for the AMD processors")
+Signed-off-by: Wyes Karny <wyes.karny@amd.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cpufreq/amd-pstate.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
+index 8dd46fad151eb..fda15b4a0770f 100644
+--- a/drivers/cpufreq/amd-pstate.c
++++ b/drivers/cpufreq/amd-pstate.c
+@@ -996,7 +996,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+       policy->policy = CPUFREQ_POLICY_POWERSAVE;
+       if (boot_cpu_has(X86_FEATURE_CPPC)) {
+-              policy->fast_switch_possible = true;
+               ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
+               if (ret)
+                       return ret;
+@@ -1019,7 +1018,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
+ static int amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
+ {
+       pr_debug("CPU %d exiting\n", policy->cpu);
+-      policy->fast_switch_possible = false;
+       return 0;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.3/cxl-port-fix-null-pointer-access-in-devm_cxl_add_por.patch b/queue-6.3/cxl-port-fix-null-pointer-access-in-devm_cxl_add_por.patch
new file mode 100644 (file)
index 0000000..401eaff
--- /dev/null
@@ -0,0 +1,46 @@
+From 4b939e415a1af152930873082426ac181ab8948e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 23:54:35 +0200
+Subject: cxl/port: Fix NULL pointer access in devm_cxl_add_port()
+
+From: Robert Richter <rrichter@amd.com>
+
+[ Upstream commit a70fc4ed20a6118837b0aecbbf789074935f473b ]
+
+In devm_cxl_add_port() the port creation may fail and its associated
+pointer does not contain a valid address. During error message
+generation this invalid port address is used. Fix that wrong address
+access.
+
+Fixes: f3cd264c4ec1 ("cxl: Unify debug messages when calling devm_cxl_add_port()")
+Signed-off-by: Robert Richter <rrichter@amd.com>
+Reviewed-by: Dave Jiang <dave.jiang@intel.com>
+Link: https://lore.kernel.org/r/20230519215436.3394532-1-rrichter@amd.com
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/port.c | 7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index 4d1f9c5b5029a..27cbf457416d2 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -751,11 +751,10 @@ struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+       parent_port = parent_dport ? parent_dport->port : NULL;
+       if (IS_ERR(port)) {
+-              dev_dbg(uport, "Failed to add %s%s%s%s: %ld\n",
+-                      dev_name(&port->dev),
+-                      parent_port ? " to " : "",
++              dev_dbg(uport, "Failed to add%s%s%s: %ld\n",
++                      parent_port ? " port to " : "",
+                       parent_port ? dev_name(&parent_port->dev) : "",
+-                      parent_port ? "" : " (root port)",
++                      parent_port ? "" : " root port",
+                       PTR_ERR(port));
+       } else {
+               dev_dbg(uport, "%s added%s%s%s\n",
+-- 
+2.39.2
+
diff --git a/queue-6.3/drm-i915-disable-dplls-before-disconnecting-the-tc-p.patch b/queue-6.3/drm-i915-disable-dplls-before-disconnecting-the-tc-p.patch
new file mode 100644 (file)
index 0000000..a5ae58c
--- /dev/null
@@ -0,0 +1,113 @@
+From 30edde362be0d78450be77c54f6612eb96cbf106 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 16:20:33 +0200
+Subject: drm/i915: Disable DPLLs before disconnecting the TC PHY
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit b108bdd0e22a402bd3e4a6391acbb6aefad31a9e ]
+
+Bspec requires disabling the DPLLs on TC ports before disconnecting the
+port's PHY. Add a post_pll_disable encoder hook and move the call to
+disconnect the port's PHY from the post_disable hook to the new hook.
+
+Reviewed-by: Mika Kahola <mika.kahola@intel.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230323142035.1432621-28-imre.deak@intel.com
+Stable-dep-of: 45dfbd992923 ("drm/i915: Fix PIPEDMC disabling for a bigjoiner configuration")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_ddi.c     | 15 ++++++++++++---
+ drivers/gpu/drm/i915/display/intel_display.c |  2 ++
+ drivers/gpu/drm/i915/display/intel_dp_mst.c  | 15 +++++++++++++++
+ 3 files changed, 29 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
+index 254559abedfba..379050d228941 100644
+--- a/drivers/gpu/drm/i915/display/intel_ddi.c
++++ b/drivers/gpu/drm/i915/display/intel_ddi.c
+@@ -2731,9 +2731,6 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state,
+                                  const struct drm_connector_state *old_conn_state)
+ {
+       struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+-      struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
+-      enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
+-      bool is_tc_port = intel_phy_is_tc(dev_priv, phy);
+       struct intel_crtc *slave_crtc;
+       if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST)) {
+@@ -2783,6 +2780,17 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state,
+       else
+               intel_ddi_post_disable_dp(state, encoder, old_crtc_state,
+                                         old_conn_state);
++}
++
++static void intel_ddi_post_pll_disable(struct intel_atomic_state *state,
++                                     struct intel_encoder *encoder,
++                                     const struct intel_crtc_state *old_crtc_state,
++                                     const struct drm_connector_state *old_conn_state)
++{
++      struct drm_i915_private *i915 = to_i915(encoder->base.dev);
++      struct intel_digital_port *dig_port = enc_to_dig_port(encoder);
++      enum phy phy = intel_port_to_phy(i915, encoder->port);
++      bool is_tc_port = intel_phy_is_tc(i915, phy);
+       main_link_aux_power_domain_put(dig_port, old_crtc_state);
+@@ -4381,6 +4389,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
+       encoder->pre_pll_enable = intel_ddi_pre_pll_enable;
+       encoder->pre_enable = intel_ddi_pre_enable;
+       encoder->disable = intel_disable_ddi;
++      encoder->post_pll_disable = intel_ddi_post_pll_disable;
+       encoder->post_disable = intel_ddi_post_disable;
+       encoder->update_pipe = intel_ddi_update_pipe;
+       encoder->get_hw_state = intel_ddi_get_hw_state;
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index df4c6e000961c..963680ea6fedd 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -2022,6 +2022,8 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
+       intel_disable_shared_dpll(old_crtc_state);
++      intel_encoders_post_pll_disable(state, crtc);
++
+       intel_dmc_disable_pipe(i915, crtc->pipe);
+ }
+diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
+index 7c9b328bc2d73..a93018ce0e312 100644
+--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
++++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
+@@ -623,6 +623,20 @@ static void intel_mst_post_disable_dp(struct intel_atomic_state *state,
+                   intel_dp->active_mst_links);
+ }
++static void intel_mst_post_pll_disable_dp(struct intel_atomic_state *state,
++                                        struct intel_encoder *encoder,
++                                        const struct intel_crtc_state *old_crtc_state,
++                                        const struct drm_connector_state *old_conn_state)
++{
++      struct intel_dp_mst_encoder *intel_mst = enc_to_mst(encoder);
++      struct intel_digital_port *dig_port = intel_mst->primary;
++      struct intel_dp *intel_dp = &dig_port->dp;
++
++      if (intel_dp->active_mst_links == 0 &&
++          dig_port->base.post_pll_disable)
++              dig_port->base.post_pll_disable(state, encoder, old_crtc_state, old_conn_state);
++}
++
+ static void intel_mst_pre_pll_enable_dp(struct intel_atomic_state *state,
+                                       struct intel_encoder *encoder,
+                                       const struct intel_crtc_state *pipe_config,
+@@ -1146,6 +1160,7 @@ intel_dp_create_fake_mst_encoder(struct intel_digital_port *dig_port, enum pipe
+       intel_encoder->compute_config_late = intel_dp_mst_compute_config_late;
+       intel_encoder->disable = intel_mst_disable_dp;
+       intel_encoder->post_disable = intel_mst_post_disable_dp;
++      intel_encoder->post_pll_disable = intel_mst_post_pll_disable_dp;
+       intel_encoder->update_pipe = intel_ddi_update_pipe;
+       intel_encoder->pre_pll_enable = intel_mst_pre_pll_enable_dp;
+       intel_encoder->pre_enable = intel_mst_pre_enable_dp;
+-- 
+2.39.2
+
diff --git a/queue-6.3/drm-i915-fix-pipedmc-disabling-for-a-bigjoiner-confi.patch b/queue-6.3/drm-i915-fix-pipedmc-disabling-for-a-bigjoiner-confi.patch
new file mode 100644 (file)
index 0000000..4553ab9
--- /dev/null
@@ -0,0 +1,66 @@
+From 7caa7b678703376811823b322f92e62222339349 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 May 2023 13:31:18 +0300
+Subject: drm/i915: Fix PIPEDMC disabling for a bigjoiner configuration
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit 45dfbd992923f4df174db4e23b96fca7e30d73e2 ]
+
+For a bigjoiner configuration display->crtc_disable() will be called
+first for the slave CRTCs and then for the master CRTC. However slave
+CRTCs will be actually disabled only after the master CRTC is disabled
+(from the encoder disable hooks called with the master CRTC state).
+Hence the slave PIPEDMCs can be disabled only after the master CRTC is
+disabled, make this so.
+
+intel_encoders_post_pll_disable() must be called only for the master
+CRTC, as for the other two encoder disable hooks. While at it fix this
+up as well. This didn't cause a problem, since
+intel_encoders_post_pll_disable() will call the corresponding hook only
+for an encoder/connector connected to the given CRTC, however slave
+CRTCs will have no associated encoder/connector.
+
+Fixes: 3af2ff0840be ("drm/i915: Enable a PIPEDMC whenever its corresponding pipe is enabled")
+Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230510103131.1618266-2-imre.deak@intel.com
+(cherry picked from commit 7eeef32719f6af935a1554813e6bc206446339cd)
+Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_display.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index 963680ea6fedd..c84b581c61c6b 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -2022,9 +2022,17 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
+       intel_disable_shared_dpll(old_crtc_state);
+-      intel_encoders_post_pll_disable(state, crtc);
++      if (!intel_crtc_is_bigjoiner_slave(old_crtc_state)) {
++              struct intel_crtc *slave_crtc;
++
++              intel_encoders_post_pll_disable(state, crtc);
+-      intel_dmc_disable_pipe(i915, crtc->pipe);
++              intel_dmc_disable_pipe(i915, crtc->pipe);
++
++              for_each_intel_crtc_in_pipe_mask(&i915->drm, slave_crtc,
++                                               intel_crtc_bigjoiner_slave_pipes(old_crtc_state))
++                      intel_dmc_disable_pipe(i915, slave_crtc->pipe);
++      }
+ }
+ static void i9xx_pfit_enable(const struct intel_crtc_state *crtc_state)
+-- 
+2.39.2
+
diff --git a/queue-6.3/drm-i915-move-shared-dpll-disabling-into-crtc-disabl.patch b/queue-6.3/drm-i915-move-shared-dpll-disabling-into-crtc-disabl.patch
new file mode 100644 (file)
index 0000000..d630d8b
--- /dev/null
@@ -0,0 +1,71 @@
+From fb73b8c64779c2029a8c426668b4baede81a08d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Mar 2023 16:20:32 +0200
+Subject: drm/i915: Move shared DPLL disabling into CRTC disable hook
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit 3acac2d06a7e0f0b182b86b25bb8a2e9b3300406 ]
+
+The spec requires disabling the PLL on TC ports before disconnecting the
+port's PHY. Prepare for that by moving the PLL disabling to the CRTC
+disable hook, while disconnecting the PHY will be moved to the
+post_pll_disable() encoder hook in the next patch.
+
+v2: Move the call from intel_crtc_disable_noatomic() as well.
+
+Reviewed-by: Mika Kahola <mika.kahola@intel.com> # v1
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20230323142035.1432621-27-imre.deak@intel.com
+Stable-dep-of: 45dfbd992923 ("drm/i915: Fix PIPEDMC disabling for a bigjoiner configuration")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/display/intel_display.c       | 5 ++++-
+ drivers/gpu/drm/i915/display/intel_modeset_setup.c | 1 -
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
+index 2bef50ab0ad19..df4c6e000961c 100644
+--- a/drivers/gpu/drm/i915/display/intel_display.c
++++ b/drivers/gpu/drm/i915/display/intel_display.c
+@@ -2000,6 +2000,8 @@ static void ilk_crtc_disable(struct intel_atomic_state *state,
+       intel_set_cpu_fifo_underrun_reporting(dev_priv, pipe, true);
+       intel_set_pch_fifo_underrun_reporting(dev_priv, pipe, true);
++
++      intel_disable_shared_dpll(old_crtc_state);
+ }
+ static void hsw_crtc_disable(struct intel_atomic_state *state,
+@@ -2018,6 +2020,8 @@ static void hsw_crtc_disable(struct intel_atomic_state *state,
+               intel_encoders_post_disable(state, crtc);
+       }
++      intel_disable_shared_dpll(old_crtc_state);
++
+       intel_dmc_disable_pipe(i915, crtc->pipe);
+ }
+@@ -7140,7 +7144,6 @@ static void intel_old_crtc_state_disables(struct intel_atomic_state *state,
+       dev_priv->display.funcs.display->crtc_disable(state, crtc);
+       crtc->active = false;
+       intel_fbc_disable(crtc);
+-      intel_disable_shared_dpll(old_crtc_state);
+       if (!new_crtc_state->hw.active)
+               intel_initial_watermarks(state, crtc);
+diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+index 52cdbd4fc2fa0..48b726e408057 100644
+--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c
++++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+@@ -96,7 +96,6 @@ static void intel_crtc_disable_noatomic(struct intel_crtc *crtc,
+       intel_fbc_disable(crtc);
+       intel_update_watermarks(i915);
+-      intel_disable_shared_dpll(crtc_state);
+       intel_display_power_put_all_in_set(i915, &crtc->enabled_power_domains);
+-- 
+2.39.2
+
diff --git a/queue-6.3/firmware-arm_ffa-fix-usage-of-partition-info-get-cou.patch b/queue-6.3/firmware-arm_ffa-fix-usage-of-partition-info-get-cou.patch
new file mode 100644 (file)
index 0000000..aa967e0
--- /dev/null
@@ -0,0 +1,50 @@
+From d526a81fe1264f2bc7c1207ffae1455bbb0334af Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Apr 2023 16:06:02 +0100
+Subject: firmware: arm_ffa: Fix usage of partition info get count flag
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+[ Upstream commit c6e045361a27ecd4fac6413164e0d091d80eee99 ]
+
+Commit bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support")
+adds support to discovery the UUIDs of the partitions or just fetch the
+partition count using the PARTITION_INFO_GET_RETURN_COUNT_ONLY flag.
+
+However the commit doesn't handle the fact that the older version doesn't
+understand the flag and must be MBZ which results in firmware returning
+invalid parameter error. That results in the failure of the driver probe
+which is in correct.
+
+Limit the usage of the PARTITION_INFO_GET_RETURN_COUNT_ONLY flag for the
+versions above v1.0(i.e v1.1 and onwards) which fixes the issue.
+
+Fixes: bb1be7498500 ("firmware: arm_ffa: Add v1.1 get_partition_info support")
+Reported-by: Jens Wiklander <jens.wiklander@linaro.org>
+Reported-by: Marc Bonnici <marc.bonnici@arm.com>
+Tested-by: Jens Wiklander <jens.wiklander@linaro.org>
+Reviewed-by: Jens Wiklander <jens.wiklander@linaro.org>
+Link: https://lore.kernel.org/r/20230419-ffa_fixes_6-4-v2-2-d9108e43a176@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_ffa/driver.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_ffa/driver.c b/drivers/firmware/arm_ffa/driver.c
+index 02774baa90078..e234091386671 100644
+--- a/drivers/firmware/arm_ffa/driver.c
++++ b/drivers/firmware/arm_ffa/driver.c
+@@ -193,7 +193,8 @@ __ffa_partition_info_get(u32 uuid0, u32 uuid1, u32 uuid2, u32 uuid3,
+       int idx, count, flags = 0, sz, buf_sz;
+       ffa_value_t partition_info;
+-      if (!buffer || !num_partitions) /* Just get the count for now */
++      if (drv_info->version > FFA_VERSION_1_0 &&
++          (!buffer || !num_partitions)) /* Just get the count for now */
+               flags = PARTITION_INFO_GET_RETURN_COUNT_ONLY;
+       mutex_lock(&drv_info->rx_lock);
+-- 
+2.39.2
+
diff --git a/queue-6.3/firmware-arm_scmi-fix-incorrect-alloc_workqueue-invo.patch b/queue-6.3/firmware-arm_scmi-fix-incorrect-alloc_workqueue-invo.patch
new file mode 100644 (file)
index 0000000..f7772d7
--- /dev/null
@@ -0,0 +1,38 @@
+From 0cc6a7732b689ec06fb27189936e1c9d99147dd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Apr 2023 09:33:49 -1000
+Subject: firmware: arm_scmi: Fix incorrect alloc_workqueue() invocation
+
+From: Tejun Heo <tj@kernel.org>
+
+[ Upstream commit 44e8d5ad2dc01529eb1316b1521f24ac4aac8eaf ]
+
+scmi_xfer_raw_worker_init() is specifying a flag, WQ_SYSFS, as @max_active.
+Fix it by or'ing WQ_SYSFS into @flags so that it actually enables sysfs
+interface and using 0 for @max_active for the default setting.
+
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Fixes: 3c3d818a9317 ("firmware: arm_scmi: Add core raw transmission support")
+Link: https://lore.kernel.org/r/ZEGTnajiQm7mkkZS@slm.duckdns.org
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/raw_mode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
+index d40df099fd515..6971dcf72fb99 100644
+--- a/drivers/firmware/arm_scmi/raw_mode.c
++++ b/drivers/firmware/arm_scmi/raw_mode.c
+@@ -1066,7 +1066,7 @@ static int scmi_xfer_raw_worker_init(struct scmi_raw_mode_info *raw)
+       raw->wait_wq = alloc_workqueue("scmi-raw-wait-wq-%d",
+                                      WQ_UNBOUND | WQ_FREEZABLE |
+-                                     WQ_HIGHPRI, WQ_SYSFS, raw->id);
++                                     WQ_HIGHPRI | WQ_SYSFS, 0, raw->id);
+       if (!raw->wait_wq)
+               return -ENOMEM;
+-- 
+2.39.2
+
diff --git a/queue-6.3/gpio-f7188x-fix-chip-name-and-pin-count-on-nuvoton-c.patch b/queue-6.3/gpio-f7188x-fix-chip-name-and-pin-count-on-nuvoton-c.patch
new file mode 100644 (file)
index 0000000..8ba4c26
--- /dev/null
@@ -0,0 +1,147 @@
+From 4562059a72853a4e8ae3081e722a26c23b729c9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Apr 2023 17:20:55 +0200
+Subject: gpio-f7188x: fix chip name and pin count on Nuvoton chip
+
+From: Henning Schild <henning.schild@siemens.com>
+
+[ Upstream commit 3002b8642f016d7fe3ff56240dacea1075f6b877 ]
+
+In fact the device with chip id 0xD283 is called NCT6126D, and that is
+the chip id the Nuvoton code was written for. Correct that name to avoid
+confusion, because a NCT6116D in fact exists as well but has another
+chip id, and is currently not supported.
+
+The look at the spec also revealed that GPIO group7 in fact has 8 pins,
+so correct the pin count in that group as well.
+
+Fixes: d0918a84aff0 ("gpio-f7188x: Add GPIO support for Nuvoton NCT6116")
+Reported-by: Xing Tong Wu <xingtong.wu@siemens.com>
+Signed-off-by: Henning Schild <henning.schild@siemens.com>
+Acked-by: Simon Guinot <simon.guinot@sequanux.org>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/Kconfig       |  2 +-
+ drivers/gpio/gpio-f7188x.c | 28 ++++++++++++++--------------
+ 2 files changed, 15 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
+index badbe05823180..14b655411aa0a 100644
+--- a/drivers/gpio/Kconfig
++++ b/drivers/gpio/Kconfig
+@@ -879,7 +879,7 @@ config GPIO_F7188X
+       help
+         This option enables support for GPIOs found on Fintek Super-I/O
+         chips F71869, F71869A, F71882FG, F71889F and F81866.
+-        As well as Nuvoton Super-I/O chip NCT6116D.
++        As well as Nuvoton Super-I/O chip NCT6126D.
+         To compile this driver as a module, choose M here: the module will
+         be called f7188x-gpio.
+diff --git a/drivers/gpio/gpio-f7188x.c b/drivers/gpio/gpio-f7188x.c
+index 9effa7769bef5..f54ca5a1775ea 100644
+--- a/drivers/gpio/gpio-f7188x.c
++++ b/drivers/gpio/gpio-f7188x.c
+@@ -48,7 +48,7 @@
+ /*
+  * Nuvoton devices.
+  */
+-#define SIO_NCT6116D_ID               0xD283  /* NCT6116D chipset ID */
++#define SIO_NCT6126D_ID               0xD283  /* NCT6126D chipset ID */
+ #define SIO_LD_GPIO_NUVOTON   0x07    /* GPIO logical device */
+@@ -62,7 +62,7 @@ enum chips {
+       f81866,
+       f81804,
+       f81865,
+-      nct6116d,
++      nct6126d,
+ };
+ static const char * const f7188x_names[] = {
+@@ -74,7 +74,7 @@ static const char * const f7188x_names[] = {
+       "f81866",
+       "f81804",
+       "f81865",
+-      "nct6116d",
++      "nct6126d",
+ };
+ struct f7188x_sio {
+@@ -187,8 +187,8 @@ static int f7188x_gpio_set_config(struct gpio_chip *chip, unsigned offset,
+ /* Output mode register (0:open drain 1:push-pull). */
+ #define f7188x_gpio_out_mode(base) ((base) + 3)
+-#define f7188x_gpio_dir_invert(type)  ((type) == nct6116d)
+-#define f7188x_gpio_data_single(type) ((type) == nct6116d)
++#define f7188x_gpio_dir_invert(type)  ((type) == nct6126d)
++#define f7188x_gpio_data_single(type) ((type) == nct6126d)
+ static struct f7188x_gpio_bank f71869_gpio_bank[] = {
+       F7188X_GPIO_BANK(0, 6, 0xF0, DRVNAME "-0"),
+@@ -274,7 +274,7 @@ static struct f7188x_gpio_bank f81865_gpio_bank[] = {
+       F7188X_GPIO_BANK(60, 5, 0x90, DRVNAME "-6"),
+ };
+-static struct f7188x_gpio_bank nct6116d_gpio_bank[] = {
++static struct f7188x_gpio_bank nct6126d_gpio_bank[] = {
+       F7188X_GPIO_BANK(0, 8, 0xE0, DRVNAME "-0"),
+       F7188X_GPIO_BANK(10, 8, 0xE4, DRVNAME "-1"),
+       F7188X_GPIO_BANK(20, 8, 0xE8, DRVNAME "-2"),
+@@ -282,7 +282,7 @@ static struct f7188x_gpio_bank nct6116d_gpio_bank[] = {
+       F7188X_GPIO_BANK(40, 8, 0xF0, DRVNAME "-4"),
+       F7188X_GPIO_BANK(50, 8, 0xF4, DRVNAME "-5"),
+       F7188X_GPIO_BANK(60, 8, 0xF8, DRVNAME "-6"),
+-      F7188X_GPIO_BANK(70, 1, 0xFC, DRVNAME "-7"),
++      F7188X_GPIO_BANK(70, 8, 0xFC, DRVNAME "-7"),
+ };
+ static int f7188x_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+@@ -490,9 +490,9 @@ static int f7188x_gpio_probe(struct platform_device *pdev)
+               data->nr_bank = ARRAY_SIZE(f81865_gpio_bank);
+               data->bank = f81865_gpio_bank;
+               break;
+-      case nct6116d:
+-              data->nr_bank = ARRAY_SIZE(nct6116d_gpio_bank);
+-              data->bank = nct6116d_gpio_bank;
++      case nct6126d:
++              data->nr_bank = ARRAY_SIZE(nct6126d_gpio_bank);
++              data->bank = nct6126d_gpio_bank;
+               break;
+       default:
+               return -ENODEV;
+@@ -559,9 +559,9 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
+       case SIO_F81865_ID:
+               sio->type = f81865;
+               break;
+-      case SIO_NCT6116D_ID:
++      case SIO_NCT6126D_ID:
+               sio->device = SIO_LD_GPIO_NUVOTON;
+-              sio->type = nct6116d;
++              sio->type = nct6126d;
+               break;
+       default:
+               pr_info("Unsupported Fintek device 0x%04x\n", devid);
+@@ -569,7 +569,7 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
+       }
+       /* double check manufacturer where possible */
+-      if (sio->type != nct6116d) {
++      if (sio->type != nct6126d) {
+               manid = superio_inw(addr, SIO_FINTEK_MANID);
+               if (manid != SIO_FINTEK_ID) {
+                       pr_debug("Not a Fintek device at 0x%08x\n", addr);
+@@ -581,7 +581,7 @@ static int __init f7188x_find(int addr, struct f7188x_sio *sio)
+       err = 0;
+       pr_info("Found %s at %#x\n", f7188x_names[sio->type], (unsigned int)addr);
+-      if (sio->type != nct6116d)
++      if (sio->type != nct6126d)
+               pr_info("   revision %d\n", superio_inb(addr, SIO_FINTEK_DEVREV));
+ err:
+-- 
+2.39.2
+
diff --git a/queue-6.3/gpiolib-fix-allocation-of-mixed-dynamic-static-gpios.patch b/queue-6.3/gpiolib-fix-allocation-of-mixed-dynamic-static-gpios.patch
new file mode 100644 (file)
index 0000000..fe41c14
--- /dev/null
@@ -0,0 +1,65 @@
+From f8ef6ce933aeda80dfe57567e58245bc3eea1b8e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 4 May 2023 08:04:21 +0200
+Subject: gpiolib: fix allocation of mixed dynamic/static GPIOs
+
+From: Andreas Kemnade <andreas@kemnade.info>
+
+[ Upstream commit 7dd3d9bd873f138675cb727eaa51a498d99f0e89 ]
+
+If static allocation and dynamic allocation GPIOs are present,
+dynamic allocation pollutes the numberspace for static allocation,
+causing static allocation to fail.
+Enforce dynamic allocation above GPIO_DYNAMIC_BASE.
+
+Seen on a GTA04 when omap-gpio (static) and twl-gpio (dynamic)
+raced:
+[some successful registrations of omap_gpio instances]
+[    2.553833] twl4030_gpio twl4030-gpio: gpio (irq 145) chaining IRQs 161..178
+[    2.561401] gpiochip_find_base: found new base at 160
+[    2.564392] gpio gpiochip5: (twl4030): added GPIO chardev (254:5)
+[    2.564544] gpio gpiochip5: registered GPIOs 160 to 177 on twl4030
+[...]
+[    2.692169] omap-gpmc 6e000000.gpmc: GPMC revision 5.0
+[    2.697357] gpmc_mem_init: disabling cs 0 mapped at 0x0-0x1000000
+[    2.703643] gpiochip_find_base: found new base at 178
+[    2.704376] gpio gpiochip6: (omap-gpmc): added GPIO chardev (254:6)
+[    2.704589] gpio gpiochip6: registered GPIOs 178 to 181 on omap-gpmc
+[...]
+[    2.840393] gpio gpiochip7: Static allocation of GPIO base is deprecated, use dynamic allocation.
+[    2.849365] gpio gpiochip7: (gpio-160-191): GPIO integer space overlap, cannot add chip
+[    2.857513] gpiochip_add_data_with_key: GPIOs 160..191 (gpio-160-191) failed to register, -16
+[    2.866149] omap_gpio 48310000.gpio: error -EBUSY: Could not register gpio chip
+
+On that device it is fixed invasively by
+commit 92bf78b33b0b4 ("gpio: omap: use dynamic allocation of base")
+but let's also fix that for devices where there is still
+a mixture of static and dynamic allocation.
+
+Fixes: 7b61212f2a07 ("gpiolib: Get rid of ARCH_NR_GPIOS")
+Signed-off-by: Andreas Kemnade <andreas@kemnade.info>
+Reviewed-by: <christophe.leroy@csgroup.eu>
+Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
+Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpiolib.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index 19bd23044b017..4472214fcd43a 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -193,6 +193,8 @@ static int gpiochip_find_base(int ngpio)
+                       break;
+               /* nope, check the space right after the chip */
+               base = gdev->base + gdev->ngpio;
++              if (base < GPIO_DYNAMIC_BASE)
++                      base = GPIO_DYNAMIC_BASE;
+       }
+       if (gpio_is_valid(base)) {
+-- 
+2.39.2
+
diff --git a/queue-6.3/net-fec-add-dma_wmb-to-ensure-correct-descriptor-val.patch b/queue-6.3/net-fec-add-dma_wmb-to-ensure-correct-descriptor-val.patch
new file mode 100644 (file)
index 0000000..8141259
--- /dev/null
@@ -0,0 +1,75 @@
+From 316556074d3ed59c3bc79747849abdfc81ab66c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 18 May 2023 10:02:02 -0500
+Subject: net: fec: add dma_wmb to ensure correct descriptor values
+
+From: Shenwei Wang <shenwei.wang@nxp.com>
+
+[ Upstream commit 9025944fddfed5966c8f102f1fe921ab3aee2c12 ]
+
+Two dma_wmb() are added in the XDP TX path to ensure proper ordering of
+descriptor and buffer updates:
+1. A dma_wmb() is added after updating the last BD to make sure
+   the updates to rest of the descriptor are visible before
+   transferring ownership to FEC.
+2. A dma_wmb() is also added after updating the bdp to ensure these
+   updates are visible before updating txq->bd.cur.
+3. Start the xmit of the frame immediately right after configuring the
+   tx descriptor.
+
+Fixes: 6d6b39f180b8 ("net: fec: add initial XDP support")
+Signed-off-by: Shenwei Wang <shenwei.wang@nxp.com>
+Reviewed-by: Wei Fang <wei.fang@nxp.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/freescale/fec_main.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
+index 577d94821b3e7..38e5b5abe067c 100644
+--- a/drivers/net/ethernet/freescale/fec_main.c
++++ b/drivers/net/ethernet/freescale/fec_main.c
+@@ -3834,6 +3834,11 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+       index = fec_enet_get_bd_index(last_bdp, &txq->bd);
+       txq->tx_skbuff[index] = NULL;
++      /* Make sure the updates to rest of the descriptor are performed before
++       * transferring ownership.
++       */
++      dma_wmb();
++
+       /* Send it on its way.  Tell FEC it's ready, interrupt when done,
+        * it's the last BD of the frame, and to put the CRC on the end.
+        */
+@@ -3843,8 +3848,14 @@ static int fec_enet_txq_xmit_frame(struct fec_enet_private *fep,
+       /* If this was the last BD in the ring, start at the beginning again. */
+       bdp = fec_enet_get_nextdesc(last_bdp, &txq->bd);
++      /* Make sure the update to bdp are performed before txq->bd.cur. */
++      dma_wmb();
++
+       txq->bd.cur = bdp;
++      /* Trigger transmission start */
++      writel(0, txq->bd.reg_desc_active);
++
+       return 0;
+ }
+@@ -3873,12 +3884,6 @@ static int fec_enet_xdp_xmit(struct net_device *dev,
+               sent_frames++;
+       }
+-      /* Make sure the update to bdp and tx_skbuff are performed. */
+-      wmb();
+-
+-      /* Trigger transmission start */
+-      writel(0, txq->bd.reg_desc_active);
+-
+       __netif_tx_unlock(nq);
+       return sent_frames;
+-- 
+2.39.2
+
diff --git a/queue-6.3/net-mlx5-e-switch-devcom-sync-devcom-events-and-devc.patch b/queue-6.3/net-mlx5-e-switch-devcom-sync-devcom-events-and-devc.patch
new file mode 100644 (file)
index 0000000..0d9216b
--- /dev/null
@@ -0,0 +1,91 @@
+From 7a52d6618afdf97173eab06a8c0e2616196468e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 6 Feb 2023 11:52:02 +0200
+Subject: net/mlx5: E-switch, Devcom, sync devcom events and devcom comp
+ register
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 8c253dfc89efde6b5faddf9e7400e5d17884e042 ]
+
+devcom events are sent to all registered component. Following the
+cited patch, it is possible for two components, e.g.: two eswitches,
+to send devcom events, while both components are registered. This
+means eswitch layer will do double un/pairing, which is double
+allocation and free of resources, even though only one un/pairing is
+needed. flow example:
+
+       cpu0                                    cpu1
+       ----                                    ----
+
+ mlx5_devlink_eswitch_mode_set(dev0)
+  esw_offloads_devcom_init()
+   mlx5_devcom_register_component(esw0)
+                                         mlx5_devlink_eswitch_mode_set(dev1)
+                                          esw_offloads_devcom_init()
+                                           mlx5_devcom_register_component(esw1)
+                                           mlx5_devcom_send_event()
+   mlx5_devcom_send_event()
+
+Hence, check whether the eswitches are already un/paired before
+free/allocation of resources.
+
+Fixes: 09b278462f16 ("net: devlink: enable parallel ops on netlink interface")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Mark Bloch <mbloch@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch.h        | 1 +
+ .../net/ethernet/mellanox/mlx5/core/eswitch_offloads.c   | 9 ++++++++-
+ 2 files changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index 31876db3c7641..5fd971cee6fdc 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -341,6 +341,7 @@ struct mlx5_eswitch {
+               u32             large_group_num;
+       }  params;
+       struct blocking_notifier_head n_head;
++      bool paired[MLX5_MAX_PORTS];
+ };
+ void esw_offloads_disable(struct mlx5_eswitch *esw);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 727b30f3a229a..a60c9f292e10c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -2744,6 +2744,9 @@ static int mlx5_esw_offloads_devcom_event(int event,
+                   mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
+                       break;
++              if (esw->paired[mlx5_get_dev_index(peer_esw->dev)])
++                      break;
++
+               err = mlx5_esw_offloads_set_ns_peer(esw, peer_esw, true);
+               if (err)
+                       goto err_out;
+@@ -2755,14 +2758,18 @@ static int mlx5_esw_offloads_devcom_event(int event,
+               if (err)
+                       goto err_pair;
++              esw->paired[mlx5_get_dev_index(peer_esw->dev)] = true;
++              peer_esw->paired[mlx5_get_dev_index(esw->dev)] = true;
+               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, true);
+               break;
+       case ESW_OFFLOADS_DEVCOM_UNPAIR:
+-              if (!mlx5_devcom_is_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS))
++              if (!esw->paired[mlx5_get_dev_index(peer_esw->dev)])
+                       break;
+               mlx5_devcom_set_paired(devcom, MLX5_DEVCOM_ESW_OFFLOADS, false);
++              esw->paired[mlx5_get_dev_index(peer_esw->dev)] = false;
++              peer_esw->paired[mlx5_get_dev_index(esw->dev)] = false;
+               mlx5_esw_offloads_unpair(peer_esw);
+               mlx5_esw_offloads_unpair(esw);
+               mlx5_esw_offloads_set_ns_peer(esw, peer_esw, false);
+-- 
+2.39.2
+
diff --git a/queue-6.3/net-mlx5e-tc-fix-using-eswitch-mapping-in-nic-mode.patch b/queue-6.3/net-mlx5e-tc-fix-using-eswitch-mapping-in-nic-mode.patch
new file mode 100644 (file)
index 0000000..4084d7a
--- /dev/null
@@ -0,0 +1,307 @@
+From cfa5835a7818f0d12bf062b2136bc68cbe57e8fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Apr 2023 16:04:48 +0300
+Subject: net/mlx5e: TC, Fix using eswitch mapping in nic mode
+
+From: Paul Blakey <paulb@nvidia.com>
+
+[ Upstream commit dfa1e46d6093831b9d49f0f350227a1d13644a2f ]
+
+Cited patch is using the eswitch object mapping pool while
+in nic mode where it isn't initialized. This results in the
+trace below [0].
+
+Fix that by using either nic or eswitch object mapping pool
+depending if eswitch is enabled or not.
+
+[0]:
+[  826.446057] ==================================================================
+[  826.446729] BUG: KASAN: slab-use-after-free in mlx5_add_flow_rules+0x30/0x490 [mlx5_core]
+[  826.447515] Read of size 8 at addr ffff888194485830 by task tc/6233
+
+[  826.448243] CPU: 16 PID: 6233 Comm: tc Tainted: G        W          6.3.0-rc6+ #1
+[  826.448890] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[  826.449785] Call Trace:
+[  826.450052]  <TASK>
+[  826.450302]  dump_stack_lvl+0x33/0x50
+[  826.450650]  print_report+0xc2/0x610
+[  826.450998]  ? __virt_addr_valid+0xb1/0x130
+[  826.451385]  ? mlx5_add_flow_rules+0x30/0x490 [mlx5_core]
+[  826.451935]  kasan_report+0xae/0xe0
+[  826.452276]  ? mlx5_add_flow_rules+0x30/0x490 [mlx5_core]
+[  826.452829]  mlx5_add_flow_rules+0x30/0x490 [mlx5_core]
+[  826.453368]  ? __kmalloc_node+0x5a/0x120
+[  826.453733]  esw_add_restore_rule+0x20f/0x270 [mlx5_core]
+[  826.454288]  ? mlx5_eswitch_add_send_to_vport_meta_rule+0x260/0x260 [mlx5_core]
+[  826.455011]  ? mutex_unlock+0x80/0xd0
+[  826.455361]  ? __mutex_unlock_slowpath.constprop.0+0x210/0x210
+[  826.455862]  ? mapping_add+0x2cb/0x440 [mlx5_core]
+[  826.456425]  mlx5e_tc_action_miss_mapping_get+0x139/0x180 [mlx5_core]
+[  826.457058]  ? mlx5e_tc_update_skb_nic+0xb0/0xb0 [mlx5_core]
+[  826.457636]  ? __kasan_kmalloc+0x77/0x90
+[  826.458000]  ? __kmalloc+0x57/0x120
+[  826.458336]  mlx5_tc_ct_flow_offload+0x325/0xe40 [mlx5_core]
+[  826.458916]  ? ct_kernel_enter.constprop.0+0x48/0xa0
+[  826.459360]  ? mlx5_tc_ct_parse_action+0xf0/0xf0 [mlx5_core]
+[  826.459933]  ? mlx5e_mod_hdr_attach+0x491/0x520 [mlx5_core]
+[  826.460507]  ? mlx5e_mod_hdr_get+0x12/0x20 [mlx5_core]
+[  826.461046]  ? mlx5e_tc_attach_mod_hdr+0x154/0x170 [mlx5_core]
+[  826.461635]  mlx5e_configure_flower+0x969/0x2110 [mlx5_core]
+[  826.462217]  ? _raw_spin_lock_bh+0x85/0xe0
+[  826.462597]  ? __mlx5e_add_fdb_flow+0x750/0x750 [mlx5_core]
+[  826.463163]  ? kasan_save_stack+0x2e/0x40
+[  826.463534]  ? down_read+0x115/0x1b0
+[  826.463878]  ? down_write_killable+0x110/0x110
+[  826.464288]  ? tc_setup_action.part.0+0x9f/0x3b0
+[  826.464701]  ? mlx5e_is_uplink_rep+0x4c/0x90 [mlx5_core]
+[  826.465253]  ? mlx5e_tc_reoffload_flows_work+0x130/0x130 [mlx5_core]
+[  826.465878]  tc_setup_cb_add+0x112/0x250
+[  826.466247]  fl_hw_replace_filter+0x230/0x310 [cls_flower]
+[  826.466724]  ? fl_hw_destroy_filter+0x1a0/0x1a0 [cls_flower]
+[  826.467212]  fl_change+0x14e1/0x2030 [cls_flower]
+[  826.467636]  ? sock_def_readable+0x89/0x120
+[  826.468019]  ? fl_tmplt_create+0x2d0/0x2d0 [cls_flower]
+[  826.468509]  ? kasan_unpoison+0x23/0x50
+[  826.468873]  ? get_random_u16+0x180/0x180
+[  826.469244]  ? __radix_tree_lookup+0x2b/0x130
+[  826.469640]  ? fl_get+0x7b/0x140 [cls_flower]
+[  826.470042]  ? fl_mask_put+0x200/0x200 [cls_flower]
+[  826.470478]  ? __mutex_unlock_slowpath.constprop.0+0x210/0x210
+[  826.470973]  ? fl_tmplt_create+0x2d0/0x2d0 [cls_flower]
+[  826.471427]  tc_new_tfilter+0x644/0x1050
+[  826.471795]  ? tc_get_tfilter+0x860/0x860
+[  826.472170]  ? __thaw_task+0x130/0x130
+[  826.472525]  ? arch_stack_walk+0x98/0xf0
+[  826.472892]  ? cap_capable+0x9f/0xd0
+[  826.473235]  ? security_capable+0x47/0x60
+[  826.473608]  rtnetlink_rcv_msg+0x1d5/0x550
+[  826.473985]  ? rtnl_calcit.isra.0+0x1f0/0x1f0
+[  826.474383]  ? __stack_depot_save+0x35/0x4c0
+[  826.474779]  ? kasan_save_stack+0x2e/0x40
+[  826.475149]  ? kasan_save_stack+0x1e/0x40
+[  826.475518]  ? __kasan_record_aux_stack+0x9f/0xb0
+[  826.475939]  ? task_work_add+0x77/0x1c0
+[  826.476305]  netlink_rcv_skb+0xe0/0x210
+[  826.476661]  ? rtnl_calcit.isra.0+0x1f0/0x1f0
+[  826.477057]  ? netlink_ack+0x7c0/0x7c0
+[  826.477412]  ? rhashtable_jhash2+0xef/0x150
+[  826.477796]  ? _copy_from_iter+0x105/0x770
+[  826.484386]  netlink_unicast+0x346/0x490
+[  826.484755]  ? netlink_attachskb+0x400/0x400
+[  826.485145]  ? kernel_text_address+0xc2/0xd0
+[  826.485535]  netlink_sendmsg+0x3b0/0x6c0
+[  826.485902]  ? kernel_text_address+0xc2/0xd0
+[  826.486296]  ? netlink_unicast+0x490/0x490
+[  826.486671]  ? iovec_from_user.part.0+0x7a/0x1a0
+[  826.487083]  ? netlink_unicast+0x490/0x490
+[  826.487461]  sock_sendmsg+0x73/0xc0
+[  826.487803]  ____sys_sendmsg+0x364/0x380
+[  826.488186]  ? import_iovec+0x7/0x10
+[  826.488531]  ? kernel_sendmsg+0x30/0x30
+[  826.488893]  ? __copy_msghdr+0x180/0x180
+[  826.489258]  ? kasan_save_stack+0x2e/0x40
+[  826.489629]  ? kasan_save_stack+0x1e/0x40
+[  826.490002]  ? __kasan_record_aux_stack+0x9f/0xb0
+[  826.490424]  ? __call_rcu_common.constprop.0+0x46/0x580
+[  826.490876]  ___sys_sendmsg+0xdf/0x140
+[  826.491231]  ? copy_msghdr_from_user+0x110/0x110
+[  826.491649]  ? fget_raw+0x120/0x120
+[  826.491988]  ? ___sys_recvmsg+0xd9/0x130
+[  826.492355]  ? folio_batch_add_and_move+0x80/0xa0
+[  826.492776]  ? _raw_spin_lock+0x7a/0xd0
+[  826.493137]  ? _raw_spin_lock+0x7a/0xd0
+[  826.493500]  ? _raw_read_lock_irq+0x30/0x30
+[  826.493880]  ? kasan_set_track+0x21/0x30
+[  826.494249]  ? kasan_save_free_info+0x2a/0x40
+[  826.494650]  ? do_sys_openat2+0xff/0x270
+[  826.495016]  ? __fget_light+0x1b5/0x200
+[  826.495377]  ? __virt_addr_valid+0xb1/0x130
+[  826.495763]  __sys_sendmsg+0xb2/0x130
+[  826.496118]  ? __sys_sendmsg_sock+0x20/0x20
+[  826.496501]  ? __x64_sys_rseq+0x2e0/0x2e0
+[  826.496874]  ? do_user_addr_fault+0x276/0x820
+[  826.497273]  ? fpregs_assert_state_consistent+0x52/0x60
+[  826.497727]  ? exit_to_user_mode_prepare+0x30/0x120
+[  826.498158]  do_syscall_64+0x3d/0x90
+[  826.498502]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+[  826.498949] RIP: 0033:0x7f9b67f4f887
+[  826.499294] Code: 0a 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b9 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 2e 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 89 54 24 1c 48 89 74 24 10
+[  826.500742] RSP: 002b:00007fff5d1a5498 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+[  826.501395] RAX: ffffffffffffffda RBX: 0000000064413ce6 RCX: 00007f9b67f4f887
+[  826.501975] RDX: 0000000000000000 RSI: 00007fff5d1a5500 RDI: 0000000000000003
+[  826.502556] RBP: 0000000000000000 R08: 0000000000000001 R09: 0000000000000001
+[  826.503135] R10: 00007f9b67e08708 R11: 0000000000000246 R12: 0000000000000001
+[  826.503714] R13: 0000000000000001 R14: 00007fff5d1a9800 R15: 0000000000485400
+[  826.504304]  </TASK>
+
+[  826.504753] Allocated by task 3764:
+[  826.505090]  kasan_save_stack+0x1e/0x40
+[  826.505453]  kasan_set_track+0x21/0x30
+[  826.505810]  __kasan_kmalloc+0x77/0x90
+[  826.506164]  __mlx5_create_flow_table+0x16d/0xbb0 [mlx5_core]
+[  826.506742]  esw_offloads_enable+0x60d/0xfb0 [mlx5_core]
+[  826.507292]  mlx5_eswitch_enable_locked+0x4d3/0x680 [mlx5_core]
+[  826.507885]  mlx5_devlink_eswitch_mode_set+0x2a3/0x580 [mlx5_core]
+[  826.508513]  devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0
+[  826.508969]  genl_family_rcv_msg_doit.isra.0+0x146/0x1c0
+[  826.509427]  genl_rcv_msg+0x28d/0x3e0
+[  826.509772]  netlink_rcv_skb+0xe0/0x210
+[  826.510133]  genl_rcv+0x24/0x40
+[  826.510448]  netlink_unicast+0x346/0x490
+[  826.510810]  netlink_sendmsg+0x3b0/0x6c0
+[  826.511179]  sock_sendmsg+0x73/0xc0
+[  826.511519]  __sys_sendto+0x18d/0x220
+[  826.511867]  __x64_sys_sendto+0x72/0x80
+[  826.512232]  do_syscall_64+0x3d/0x90
+[  826.512576]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+[  826.513220] Freed by task 5674:
+[  826.513535]  kasan_save_stack+0x1e/0x40
+[  826.513893]  kasan_set_track+0x21/0x30
+[  826.514245]  kasan_save_free_info+0x2a/0x40
+[  826.514629]  ____kasan_slab_free+0x11a/0x1b0
+[  826.515021]  __kmem_cache_free+0x14d/0x280
+[  826.515399]  tree_put_node+0x109/0x1c0 [mlx5_core]
+[  826.515907]  mlx5_destroy_flow_table+0x119/0x630 [mlx5_core]
+[  826.516481]  esw_offloads_steering_cleanup+0xe7/0x150 [mlx5_core]
+[  826.517084]  esw_offloads_disable+0xe0/0x160 [mlx5_core]
+[  826.517632]  mlx5_eswitch_disable_locked+0x26c/0x290 [mlx5_core]
+[  826.518225]  mlx5_devlink_eswitch_mode_set+0x128/0x580 [mlx5_core]
+[  826.518834]  devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0
+[  826.519286]  genl_family_rcv_msg_doit.isra.0+0x146/0x1c0
+[  826.519748]  genl_rcv_msg+0x28d/0x3e0
+[  826.520101]  netlink_rcv_skb+0xe0/0x210
+[  826.520458]  genl_rcv+0x24/0x40
+[  826.520771]  netlink_unicast+0x346/0x490
+[  826.521137]  netlink_sendmsg+0x3b0/0x6c0
+[  826.521505]  sock_sendmsg+0x73/0xc0
+[  826.521842]  __sys_sendto+0x18d/0x220
+[  826.522191]  __x64_sys_sendto+0x72/0x80
+[  826.522554]  do_syscall_64+0x3d/0x90
+[  826.522894]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+[  826.523540] Last potentially related work creation:
+[  826.523969]  kasan_save_stack+0x1e/0x40
+[  826.524331]  __kasan_record_aux_stack+0x9f/0xb0
+[  826.524739]  insert_work+0x30/0x130
+[  826.525078]  __queue_work+0x34b/0x690
+[  826.525426]  queue_work_on+0x48/0x50
+[  826.525766]  __rhashtable_remove_fast_one+0x4af/0x4d0 [mlx5_core]
+[  826.526365]  del_sw_flow_group+0x1b5/0x270 [mlx5_core]
+[  826.526898]  tree_put_node+0x109/0x1c0 [mlx5_core]
+[  826.527407]  esw_offloads_steering_cleanup+0xd3/0x150 [mlx5_core]
+[  826.528009]  esw_offloads_disable+0xe0/0x160 [mlx5_core]
+[  826.528616]  mlx5_eswitch_disable_locked+0x26c/0x290 [mlx5_core]
+[  826.529218]  mlx5_devlink_eswitch_mode_set+0x128/0x580 [mlx5_core]
+[  826.529823]  devlink_nl_cmd_eswitch_set_doit+0xdf/0x1f0
+[  826.530276]  genl_family_rcv_msg_doit.isra.0+0x146/0x1c0
+[  826.530733]  genl_rcv_msg+0x28d/0x3e0
+[  826.531079]  netlink_rcv_skb+0xe0/0x210
+[  826.531439]  genl_rcv+0x24/0x40
+[  826.531755]  netlink_unicast+0x346/0x490
+[  826.532123]  netlink_sendmsg+0x3b0/0x6c0
+[  826.532487]  sock_sendmsg+0x73/0xc0
+[  826.532825]  __sys_sendto+0x18d/0x220
+[  826.533175]  __x64_sys_sendto+0x72/0x80
+[  826.533533]  do_syscall_64+0x3d/0x90
+[  826.533877]  entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+[  826.534521] The buggy address belongs to the object at ffff888194485800
+                which belongs to the cache kmalloc-512 of size 512
+[  826.535506] The buggy address is located 48 bytes inside of
+                freed 512-byte region [ffff888194485800, ffff888194485a00)
+
+[  826.536666] The buggy address belongs to the physical page:
+[  826.537138] page:00000000d75841dd refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x194480
+[  826.537915] head:00000000d75841dd order:3 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+[  826.538595] flags: 0x200000000010200(slab|head|node=0|zone=2)
+[  826.539089] raw: 0200000000010200 ffff888100042c80 ffffea0004523800 dead000000000002
+[  826.539755] raw: 0000000000000000 0000000000200020 00000001ffffffff 0000000000000000
+[  826.540417] page dumped because: kasan: bad access detected
+
+[  826.541095] Memory state around the buggy address:
+[  826.541519]  ffff888194485700: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  826.542149]  ffff888194485780: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+[  826.542773] >ffff888194485800: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  826.543400]                                      ^
+[  826.543822]  ffff888194485880: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  826.544452]  ffff888194485900: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[  826.545079] ==================================================================
+
+Fixes: 6702782845a5 ("net/mlx5e: TC, Set CT miss to the specific ct action instance")
+Signed-off-by: Paul Blakey <paulb@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 34 +++++++++++++++----
+ 1 file changed, 27 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index 2b1094e5b0c9d..53acd9a8a4c35 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -5793,22 +5793,43 @@ bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
+                                  0, NULL);
+ }
++static struct mapping_ctx *
++mlx5e_get_priv_obj_mapping(struct mlx5e_priv *priv)
++{
++      struct mlx5e_tc_table *tc;
++      struct mlx5_eswitch *esw;
++      struct mapping_ctx *ctx;
++
++      if (is_mdev_switchdev_mode(priv->mdev)) {
++              esw = priv->mdev->priv.eswitch;
++              ctx = esw->offloads.reg_c0_obj_pool;
++      } else {
++              tc = mlx5e_fs_get_tc(priv->fs);
++              ctx = tc->mapping;
++      }
++
++      return ctx;
++}
++
+ int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
+                                    u64 act_miss_cookie, u32 *act_miss_mapping)
+ {
+-      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_mapped_obj mapped_obj = {};
++      struct mlx5_eswitch *esw;
+       struct mapping_ctx *ctx;
+       int err;
+-      ctx = esw->offloads.reg_c0_obj_pool;
+-
++      ctx = mlx5e_get_priv_obj_mapping(priv);
+       mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
+       mapped_obj.act_miss_cookie = act_miss_cookie;
+       err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
+       if (err)
+               return err;
++      if (!is_mdev_switchdev_mode(priv->mdev))
++              return 0;
++
++      esw = priv->mdev->priv.eswitch;
+       attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
+       if (IS_ERR(attr->act_id_restore_rule))
+               goto err_rule;
+@@ -5823,10 +5844,9 @@ int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_a
+ void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
+                                     u32 act_miss_mapping)
+ {
+-      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+-      struct mapping_ctx *ctx;
++      struct mapping_ctx *ctx = mlx5e_get_priv_obj_mapping(priv);
+-      ctx = esw->offloads.reg_c0_obj_pool;
+-      mlx5_del_flow_rules(attr->act_id_restore_rule);
++      if (is_mdev_switchdev_mode(priv->mdev))
++              mlx5_del_flow_rules(attr->act_id_restore_rule);
+       mapping_remove(ctx, act_miss_mapping);
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.3/net-phy-mscc-enable-vsc8501-2-rgmii-rx-clock.patch b/queue-6.3/net-phy-mscc-enable-vsc8501-2-rgmii-rx-clock.patch
new file mode 100644 (file)
index 0000000..4a50d78
--- /dev/null
@@ -0,0 +1,134 @@
+From d78b0043e3cb1fb44bdb58a53bc13c2e2e3f6453 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 May 2023 17:31:08 +0200
+Subject: net: phy: mscc: enable VSC8501/2 RGMII RX clock
+
+From: David Epping <david.epping@missinglinkelectronics.com>
+
+[ Upstream commit 71460c9ec5c743e9ffffca3c874d66267c36345e ]
+
+By default the VSC8501 and VSC8502 RGMII/GMII/MII RX_CLK output is
+disabled. To allow packet forwarding towards the MAC it needs to be
+enabled.
+
+For other PHYs supported by this driver the clock output is enabled
+by default.
+
+Fixes: d3169863310d ("net: phy: mscc: add support for VSC8502")
+Signed-off-by: David Epping <david.epping@missinglinkelectronics.com>
+Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/mscc/mscc.h      |  1 +
+ drivers/net/phy/mscc/mscc_main.c | 54 +++++++++++++++++---------------
+ 2 files changed, 29 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
+index a50235fdf7d99..055e4ca5b3b5c 100644
+--- a/drivers/net/phy/mscc/mscc.h
++++ b/drivers/net/phy/mscc/mscc.h
+@@ -179,6 +179,7 @@ enum rgmii_clock_delay {
+ #define VSC8502_RGMII_CNTL              20
+ #define VSC8502_RGMII_RX_DELAY_MASK     0x0070
+ #define VSC8502_RGMII_TX_DELAY_MASK     0x0007
++#define VSC8502_RGMII_RX_CLK_DISABLE    0x0800
+ #define MSCC_PHY_WOL_LOWER_MAC_ADDR     21
+ #define MSCC_PHY_WOL_MID_MAC_ADDR       22
+diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
+index bd81a4b041e52..adc8cd6f2d95a 100644
+--- a/drivers/net/phy/mscc/mscc_main.c
++++ b/drivers/net/phy/mscc/mscc_main.c
+@@ -519,14 +519,27 @@ static int vsc85xx_mac_if_set(struct phy_device *phydev,
+  *  * 2.0 ns (which causes the data to be sampled at exactly half way between
+  *    clock transitions at 1000 Mbps) if delays should be enabled
+  */
+-static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+-                                 u16 rgmii_rx_delay_mask,
+-                                 u16 rgmii_tx_delay_mask)
++static int vsc85xx_update_rgmii_cntl(struct phy_device *phydev, u32 rgmii_cntl,
++                                   u16 rgmii_rx_delay_mask,
++                                   u16 rgmii_tx_delay_mask)
+ {
+       u16 rgmii_rx_delay_pos = ffs(rgmii_rx_delay_mask) - 1;
+       u16 rgmii_tx_delay_pos = ffs(rgmii_tx_delay_mask) - 1;
+       u16 reg_val = 0;
+-      int rc;
++      u16 mask = 0;
++      int rc = 0;
++
++      /* For traffic to pass, the VSC8502 family needs the RX_CLK disable bit
++       * to be unset for all PHY modes, so do that as part of the paged
++       * register modification.
++       * For some family members (like VSC8530/31/40/41) this bit is reserved
++       * and read-only, and the RX clock is enabled by default.
++       */
++      if (rgmii_cntl == VSC8502_RGMII_CNTL)
++              mask |= VSC8502_RGMII_RX_CLK_DISABLE;
++
++      if (phy_interface_is_rgmii(phydev))
++              mask |= rgmii_rx_delay_mask | rgmii_tx_delay_mask;
+       mutex_lock(&phydev->lock);
+@@ -537,10 +550,9 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+           phydev->interface == PHY_INTERFACE_MODE_RGMII_ID)
+               reg_val |= RGMII_CLK_DELAY_2_0_NS << rgmii_tx_delay_pos;
+-      rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
+-                            rgmii_cntl,
+-                            rgmii_rx_delay_mask | rgmii_tx_delay_mask,
+-                            reg_val);
++      if (mask)
++              rc = phy_modify_paged(phydev, MSCC_PHY_PAGE_EXTENDED_2,
++                                    rgmii_cntl, mask, reg_val);
+       mutex_unlock(&phydev->lock);
+@@ -549,19 +561,11 @@ static int vsc85xx_rgmii_set_skews(struct phy_device *phydev, u32 rgmii_cntl,
+ static int vsc85xx_default_config(struct phy_device *phydev)
+ {
+-      int rc;
+-
+       phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
+-      if (phy_interface_mode_is_rgmii(phydev->interface)) {
+-              rc = vsc85xx_rgmii_set_skews(phydev, VSC8502_RGMII_CNTL,
+-                                           VSC8502_RGMII_RX_DELAY_MASK,
+-                                           VSC8502_RGMII_TX_DELAY_MASK);
+-              if (rc)
+-                      return rc;
+-      }
+-
+-      return 0;
++      return vsc85xx_update_rgmii_cntl(phydev, VSC8502_RGMII_CNTL,
++                                       VSC8502_RGMII_RX_DELAY_MASK,
++                                       VSC8502_RGMII_TX_DELAY_MASK);
+ }
+ static int vsc85xx_get_tunable(struct phy_device *phydev,
+@@ -1758,13 +1762,11 @@ static int vsc8584_config_init(struct phy_device *phydev)
+       if (ret)
+               return ret;
+-      if (phy_interface_is_rgmii(phydev)) {
+-              ret = vsc85xx_rgmii_set_skews(phydev, VSC8572_RGMII_CNTL,
+-                                            VSC8572_RGMII_RX_DELAY_MASK,
+-                                            VSC8572_RGMII_TX_DELAY_MASK);
+-              if (ret)
+-                      return ret;
+-      }
++      ret = vsc85xx_update_rgmii_cntl(phydev, VSC8572_RGMII_CNTL,
++                                      VSC8572_RGMII_RX_DELAY_MASK,
++                                      VSC8572_RGMII_TX_DELAY_MASK);
++      if (ret)
++              return ret;
+       ret = genphy_soft_reset(phydev);
+       if (ret)
+-- 
+2.39.2
+
diff --git a/queue-6.3/platform-x86-amd-pmf-fix-cnqf-and-auto-mode-after-re.patch b/queue-6.3/platform-x86-amd-pmf-fix-cnqf-and-auto-mode-after-re.patch
new file mode 100644 (file)
index 0000000..2976f61
--- /dev/null
@@ -0,0 +1,103 @@
+From b04a6c6a55ba9b6e3b96805cc81ff6c04b45b3ab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 20:14:08 -0500
+Subject: platform/x86/amd/pmf: Fix CnQF and auto-mode after resume
+
+From: Mario Limonciello <mario.limonciello@amd.com>
+
+[ Upstream commit b54147fa374dbeadcb01b1762db1a793e06e37de ]
+
+After suspend/resume cycle there is an error message and auto-mode
+or CnQF stops working.
+
+[ 5741.447511] amd-pmf AMDI0100:00: SMU cmd failed. err: 0xff
+[ 5741.447523] amd-pmf AMDI0100:00: AMD_PMF_REGISTER_RESPONSE:ff
+[ 5741.447527] amd-pmf AMDI0100:00: AMD_PMF_REGISTER_ARGUMENT:7
+[ 5741.447531] amd-pmf AMDI0100:00: AMD_PMF_REGISTER_MESSAGE:16
+[ 5741.447540] amd-pmf AMDI0100:00: [AUTO_MODE] avg power: 0 mW mode: QUIET
+
+This is because the DRAM address used for accessing metrics table
+needs to be refreshed after a suspend resume cycle. Add a resume
+callback to reset this again.
+
+Fixes: 1a409b35c995 ("platform/x86/amd/pmf: Get performance metrics from PMFW")
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Link: https://lore.kernel.org/r/20230513011408.958-1-mario.limonciello@amd.com
+Reviewed-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/platform/x86/amd/pmf/core.c | 32 ++++++++++++++++++++++-------
+ 1 file changed, 25 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c
+index 0acc0b6221290..dc9803e1a4b9b 100644
+--- a/drivers/platform/x86/amd/pmf/core.c
++++ b/drivers/platform/x86/amd/pmf/core.c
+@@ -245,24 +245,29 @@ static const struct pci_device_id pmf_pci_ids[] = {
+       { }
+ };
+-int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
++static void amd_pmf_set_dram_addr(struct amd_pmf_dev *dev)
+ {
+       u64 phys_addr;
+       u32 hi, low;
+-      INIT_DELAYED_WORK(&dev->work_buffer, amd_pmf_get_metrics);
++      phys_addr = virt_to_phys(dev->buf);
++      hi = phys_addr >> 32;
++      low = phys_addr & GENMASK(31, 0);
++
++      amd_pmf_send_cmd(dev, SET_DRAM_ADDR_HIGH, 0, hi, NULL);
++      amd_pmf_send_cmd(dev, SET_DRAM_ADDR_LOW, 0, low, NULL);
++}
++int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
++{
+       /* Get Metrics Table Address */
+       dev->buf = kzalloc(sizeof(dev->m_table), GFP_KERNEL);
+       if (!dev->buf)
+               return -ENOMEM;
+-      phys_addr = virt_to_phys(dev->buf);
+-      hi = phys_addr >> 32;
+-      low = phys_addr & GENMASK(31, 0);
++      INIT_DELAYED_WORK(&dev->work_buffer, amd_pmf_get_metrics);
+-      amd_pmf_send_cmd(dev, SET_DRAM_ADDR_HIGH, 0, hi, NULL);
+-      amd_pmf_send_cmd(dev, SET_DRAM_ADDR_LOW, 0, low, NULL);
++      amd_pmf_set_dram_addr(dev);
+       /*
+        * Start collecting the metrics data after a small delay
+@@ -273,6 +278,18 @@ int amd_pmf_init_metrics_table(struct amd_pmf_dev *dev)
+       return 0;
+ }
++static int amd_pmf_resume_handler(struct device *dev)
++{
++      struct amd_pmf_dev *pdev = dev_get_drvdata(dev);
++
++      if (pdev->buf)
++              amd_pmf_set_dram_addr(pdev);
++
++      return 0;
++}
++
++static DEFINE_SIMPLE_DEV_PM_OPS(amd_pmf_pm, NULL, amd_pmf_resume_handler);
++
+ static void amd_pmf_init_features(struct amd_pmf_dev *dev)
+ {
+       int ret;
+@@ -414,6 +431,7 @@ static struct platform_driver amd_pmf_driver = {
+               .name = "amd-pmf",
+               .acpi_match_table = amd_pmf_acpi_ids,
+               .dev_groups = amd_pmf_driver_groups,
++              .pm = pm_sleep_ptr(&amd_pmf_pm),
+       },
+       .probe = amd_pmf_probe,
+       .remove = amd_pmf_remove,
+-- 
+2.39.2
+
diff --git a/queue-6.3/power-supply-rt9467-fix-passing-zero-to-dev_err_prob.patch b/queue-6.3/power-supply-rt9467-fix-passing-zero-to-dev_err_prob.patch
new file mode 100644 (file)
index 0000000..9fa8dad
--- /dev/null
@@ -0,0 +1,38 @@
+From de7add2cd70924f2eb3a81877c34170b7b64dcf6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 12 May 2023 13:44:23 +0800
+Subject: power: supply: rt9467: Fix passing zero to 'dev_err_probe'
+
+From: ChiaEn Wu <chiaen_wu@richtek.com>
+
+[ Upstream commit bc97139ff13598fa5becf6b582ef99ab428c03ef ]
+
+Fix passing zero to 'dev_err_probe()' in 'rt9467_request_interrupt()'
+
+Fixes: 6f7f70e3a8dd ("power: supply: rt9467: Add Richtek RT9467 charger driver")
+Reported-by: kernel test robot <lkp@intel.com>
+Reported-by: Dan Carpenter <error27@gmail.com>
+Link: https://lore.kernel.org/r/202305111228.bHLWU6bq-lkp@intel.com/
+Signed-off-by: ChiaEn Wu <chiaen_wu@richtek.com>
+Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/power/supply/rt9467-charger.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/power/supply/rt9467-charger.c b/drivers/power/supply/rt9467-charger.c
+index 73f744a3155d4..ea33693b69779 100644
+--- a/drivers/power/supply/rt9467-charger.c
++++ b/drivers/power/supply/rt9467-charger.c
+@@ -1023,7 +1023,7 @@ static int rt9467_request_interrupt(struct rt9467_chg_data *data)
+       for (i = 0; i < num_chg_irqs; i++) {
+               virq = regmap_irq_get_virq(data->irq_chip_data, chg_irqs[i].hwirq);
+               if (virq <= 0)
+-                      return dev_err_probe(dev, virq, "Failed to get (%s) irq\n",
++                      return dev_err_probe(dev, -EINVAL, "Failed to get (%s) irq\n",
+                                            chg_irqs[i].name);
+               ret = devm_request_threaded_irq(dev, virq, NULL, chg_irqs[i].handler,
+-- 
+2.39.2
+
diff --git a/queue-6.3/revert-net-mlx5-expose-steering-dropped-packets-coun.patch b/queue-6.3/revert-net-mlx5-expose-steering-dropped-packets-coun.patch
new file mode 100644 (file)
index 0000000..6f7f4dc
--- /dev/null
@@ -0,0 +1,100 @@
+From 6ac7c3384af30177b2d2505415f61a11bc94fe73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Mar 2023 19:43:27 +0200
+Subject: Revert "net/mlx5: Expose steering dropped packets counter"
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit e267b8a52ca5d5e8434929a5e9f5574aed141024 ]
+
+This reverts commit 4fe1b3a5f8fe2fdcedcaba9561e5b0ae5cb1d15b, which
+exposes the steering dropped packets counter via debugfs. The upcoming
+series will expose the counter via devlink health reporter instead
+of debugfs.
+
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 8c253dfc89ef ("net/mlx5: E-switch, Devcom, sync devcom events and devcom comp register")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/mellanox/mlx5/core/esw/debugfs.c | 22 +++----------------
+ 1 file changed, 3 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+index 3d0bbcca1cb99..2db13c71e88cd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+@@ -12,11 +12,10 @@ enum vnic_diag_counter {
+       MLX5_VNIC_DIAG_CQ_OVERRUN,
+       MLX5_VNIC_DIAG_INVALID_COMMAND,
+       MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+-      MLX5_VNIC_DIAG_RX_STEERING_DISCARD,
+ };
+ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+-                                  u64 *val)
++                                  u32 *val)
+ {
+       u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+@@ -58,10 +57,6 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou
+       case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+               *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+               break;
+-      case MLX5_VNIC_DIAG_RX_STEERING_DISCARD:
+-              *val = MLX5_GET64(vnic_diagnostic_statistics, vnic_diag_out,
+-                                nic_receive_steering_discard);
+-              break;
+       }
+       return 0;
+@@ -70,14 +65,14 @@ static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_cou
+ static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+                           enum vnic_diag_counter type)
+ {
+-      u64 val = 0;
++      u32 val = 0;
+       int ret;
+       ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+       if (ret)
+               return ret;
+-      seq_printf(file, "%llu\n", val);
++      seq_printf(file, "%d\n", val);
+       return 0;
+ }
+@@ -117,11 +112,6 @@ static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+       return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+ }
+-static int rx_steering_discard_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_RX_STEERING_DISCARD);
+-}
+-
+ DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+ DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+ DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+@@ -129,7 +119,6 @@ DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+ DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+ DEFINE_SHOW_ATTRIBUTE(invalid_command);
+ DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+-DEFINE_SHOW_ATTRIBUTE(rx_steering_discard);
+ void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+@@ -190,9 +179,4 @@ void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool
+       if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+               debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+                                   &quota_exceeded_command_fops);
+-
+-      if (MLX5_CAP_GEN(esw->dev, nic_receive_steering_discard))
+-              debugfs_create_file("rx_steering_discard", 0444, vnic_diag, vport,
+-                                  &rx_steering_discard_fops);
+-
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.3/revert-net-mlx5-expose-vnic-diagnostic-counters-for-.patch b/queue-6.3/revert-net-mlx5-expose-vnic-diagnostic-counters-for-.patch
new file mode 100644 (file)
index 0000000..8dad8e5
--- /dev/null
@@ -0,0 +1,340 @@
+From 9d99ede14fdd5d020dc37f9e12f106ce74f7f8de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 Mar 2023 19:43:47 +0200
+Subject: Revert "net/mlx5: Expose vnic diagnostic counters for eswitch managed
+ vports"
+
+From: Maher Sanalla <msanalla@nvidia.com>
+
+[ Upstream commit 0a431418f685e100c45ff150efaf4a5afa6f1982 ]
+
+This reverts commit 606e6a72e29dff9e3341c4cc9b554420e4793f401 which exposes
+the vnic diagnostic counters via debugfs. Instead, The upcoming series will
+expose the same counters through devlink health reporter.
+
+Signed-off-by: Maher Sanalla <msanalla@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Stable-dep-of: 8c253dfc89ef ("net/mlx5: E-switch, Devcom, sync devcom events and devcom comp register")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
+ .../ethernet/mellanox/mlx5/core/esw/debugfs.c | 182 ------------------
+ .../net/ethernet/mellanox/mlx5/core/eswitch.c |   6 -
+ .../net/ethernet/mellanox/mlx5/core/eswitch.h |   5 -
+ .../mellanox/mlx5/core/eswitch_offloads.c     |   3 -
+ 5 files changed, 1 insertion(+), 197 deletions(-)
+ delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+index 8d4e25cc54ea3..78755dfeaccea 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
++++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+@@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE)   += en/tc/sample.o
+ #
+ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
+                                     ecpf.o rdma.o esw/legacy.o \
+-                                    esw/debugfs.o esw/devlink_port.o esw/vporttbl.o esw/qos.o
++                                    esw/devlink_port.o esw/vporttbl.o esw/qos.o
+ mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
+                                     esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
+deleted file mode 100644
+index 2db13c71e88cd..0000000000000
+--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/debugfs.c
++++ /dev/null
+@@ -1,182 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+-/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+-
+-#include <linux/debugfs.h>
+-#include "eswitch.h"
+-
+-enum vnic_diag_counter {
+-      MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE,
+-      MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW,
+-      MLX5_VNIC_DIAG_COMP_EQ_OVERRUN,
+-      MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN,
+-      MLX5_VNIC_DIAG_CQ_OVERRUN,
+-      MLX5_VNIC_DIAG_INVALID_COMMAND,
+-      MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND,
+-};
+-
+-static int mlx5_esw_query_vnic_diag(struct mlx5_vport *vport, enum vnic_diag_counter counter,
+-                                  u32 *val)
+-{
+-      u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+-      u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
+-      struct mlx5_core_dev *dev = vport->dev;
+-      u16 vport_num = vport->vport;
+-      void *vnic_diag_out;
+-      int err;
+-
+-      MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+-      MLX5_SET(query_vnic_env_in, in, vport_number, vport_num);
+-      if (!mlx5_esw_is_manager_vport(dev->priv.eswitch, vport_num))
+-              MLX5_SET(query_vnic_env_in, in, other_vport, 1);
+-
+-      err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+-      if (err)
+-              return err;
+-
+-      vnic_diag_out = MLX5_ADDR_OF(query_vnic_env_out, out, vport_env);
+-      switch (counter) {
+-      case MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, total_error_queues);
+-              break;
+-      case MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out,
+-                              send_queue_priority_update_flow);
+-              break;
+-      case MLX5_VNIC_DIAG_COMP_EQ_OVERRUN:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, comp_eq_overrun);
+-              break;
+-      case MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, async_eq_overrun);
+-              break;
+-      case MLX5_VNIC_DIAG_CQ_OVERRUN:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, cq_overrun);
+-              break;
+-      case MLX5_VNIC_DIAG_INVALID_COMMAND:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, invalid_command);
+-              break;
+-      case MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND:
+-              *val = MLX5_GET(vnic_diagnostic_statistics, vnic_diag_out, quota_exceeded_command);
+-              break;
+-      }
+-
+-      return 0;
+-}
+-
+-static int __show_vnic_diag(struct seq_file *file, struct mlx5_vport *vport,
+-                          enum vnic_diag_counter type)
+-{
+-      u32 val = 0;
+-      int ret;
+-
+-      ret = mlx5_esw_query_vnic_diag(vport, type, &val);
+-      if (ret)
+-              return ret;
+-
+-      seq_printf(file, "%d\n", val);
+-      return 0;
+-}
+-
+-static int total_q_under_processor_handle_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_TOTAL_Q_UNDER_PROCESSOR_HANDLE);
+-}
+-
+-static int send_queue_priority_update_flow_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private,
+-                              MLX5_VNIC_DIAG_SEND_QUEUE_PRIORITY_UPDATE_FLOW);
+-}
+-
+-static int comp_eq_overrun_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_COMP_EQ_OVERRUN);
+-}
+-
+-static int async_eq_overrun_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_ASYNC_EQ_OVERRUN);
+-}
+-
+-static int cq_overrun_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_CQ_OVERRUN);
+-}
+-
+-static int invalid_command_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_INVALID_COMMAND);
+-}
+-
+-static int quota_exceeded_command_show(struct seq_file *file, void *priv)
+-{
+-      return __show_vnic_diag(file, file->private, MLX5_VNIC_DIAG_QOUTA_EXCEEDED_COMMAND);
+-}
+-
+-DEFINE_SHOW_ATTRIBUTE(total_q_under_processor_handle);
+-DEFINE_SHOW_ATTRIBUTE(send_queue_priority_update_flow);
+-DEFINE_SHOW_ATTRIBUTE(comp_eq_overrun);
+-DEFINE_SHOW_ATTRIBUTE(async_eq_overrun);
+-DEFINE_SHOW_ATTRIBUTE(cq_overrun);
+-DEFINE_SHOW_ATTRIBUTE(invalid_command);
+-DEFINE_SHOW_ATTRIBUTE(quota_exceeded_command);
+-
+-void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num)
+-{
+-      struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+-
+-      debugfs_remove_recursive(vport->dbgfs);
+-      vport->dbgfs = NULL;
+-}
+-
+-/* vnic diag dir name is "pf", "ecpf" or "{vf/sf}_xxxx" */
+-#define VNIC_DIAG_DIR_NAME_MAX_LEN 8
+-
+-void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num)
+-{
+-      struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+-      struct dentry *vnic_diag;
+-      char dir_name[VNIC_DIAG_DIR_NAME_MAX_LEN];
+-      int err;
+-
+-      if (!MLX5_CAP_GEN(esw->dev, vport_group_manager))
+-              return;
+-
+-      if (vport_num == MLX5_VPORT_PF) {
+-              strcpy(dir_name, "pf");
+-      } else if (vport_num == MLX5_VPORT_ECPF) {
+-              strcpy(dir_name, "ecpf");
+-      } else {
+-              err = snprintf(dir_name, VNIC_DIAG_DIR_NAME_MAX_LEN, "%s_%d", is_sf ? "sf" : "vf",
+-                             is_sf ? sf_num : vport_num - MLX5_VPORT_FIRST_VF);
+-              if (WARN_ON(err < 0))
+-                      return;
+-      }
+-
+-      vport->dbgfs = debugfs_create_dir(dir_name, esw->dbgfs);
+-      vnic_diag = debugfs_create_dir("vnic_diag", vport->dbgfs);
+-
+-      if (MLX5_CAP_GEN(esw->dev, vnic_env_queue_counters)) {
+-              debugfs_create_file("total_q_under_processor_handle", 0444, vnic_diag, vport,
+-                                  &total_q_under_processor_handle_fops);
+-              debugfs_create_file("send_queue_priority_update_flow", 0444, vnic_diag, vport,
+-                                  &send_queue_priority_update_flow_fops);
+-      }
+-
+-      if (MLX5_CAP_GEN(esw->dev, eq_overrun_count)) {
+-              debugfs_create_file("comp_eq_overrun", 0444, vnic_diag, vport,
+-                                  &comp_eq_overrun_fops);
+-              debugfs_create_file("async_eq_overrun", 0444, vnic_diag, vport,
+-                                  &async_eq_overrun_fops);
+-      }
+-
+-      if (MLX5_CAP_GEN(esw->dev, vnic_env_cq_overrun))
+-              debugfs_create_file("cq_overrun", 0444, vnic_diag, vport, &cq_overrun_fops);
+-
+-      if (MLX5_CAP_GEN(esw->dev, invalid_command_count))
+-              debugfs_create_file("invalid_command", 0444, vnic_diag, vport,
+-                                  &invalid_command_fops);
+-
+-      if (MLX5_CAP_GEN(esw->dev, quota_exceeded_count))
+-              debugfs_create_file("quota_exceeded_command", 0444, vnic_diag, vport,
+-                                  &quota_exceeded_command_fops);
+-}
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+index 19fed514fc173..bb2720a23a501 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+@@ -36,7 +36,6 @@
+ #include <linux/mlx5/vport.h>
+ #include <linux/mlx5/fs.h>
+ #include <linux/mlx5/mpfs.h>
+-#include <linux/debugfs.h>
+ #include "esw/acl/lgcy.h"
+ #include "esw/legacy.h"
+ #include "esw/qos.h"
+@@ -1056,7 +1055,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+       if (err)
+               return err;
+-      mlx5_esw_vport_debugfs_create(esw, vport_num, false, 0);
+       err = esw_offloads_load_rep(esw, vport_num);
+       if (err)
+               goto err_rep;
+@@ -1064,7 +1062,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+       return err;
+ err_rep:
+-      mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+       mlx5_esw_vport_disable(esw, vport_num);
+       return err;
+ }
+@@ -1072,7 +1069,6 @@ int mlx5_eswitch_load_vport(struct mlx5_eswitch *esw, u16 vport_num,
+ void mlx5_eswitch_unload_vport(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       esw_offloads_unload_rep(esw, vport_num);
+-      mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+       mlx5_esw_vport_disable(esw, vport_num);
+ }
+@@ -1672,7 +1668,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
+       dev->priv.eswitch = esw;
+       BLOCKING_INIT_NOTIFIER_HEAD(&esw->n_head);
+-      esw->dbgfs = debugfs_create_dir("esw", mlx5_debugfs_get_dev_root(esw->dev));
+       esw_info(dev,
+                "Total vports %d, per vport: max uc(%d) max mc(%d)\n",
+                esw->total_vports,
+@@ -1696,7 +1691,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
+       esw_info(esw->dev, "cleanup\n");
+-      debugfs_remove_recursive(esw->dbgfs);
+       esw->dev->priv.eswitch = NULL;
+       destroy_workqueue(esw->work_queue);
+       WARN_ON(refcount_read(&esw->qos.refcnt));
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+index c8c12d1672f99..31876db3c7641 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+@@ -195,7 +195,6 @@ struct mlx5_vport {
+       enum mlx5_eswitch_vport_event enabled_events;
+       int index;
+       struct devlink_port *dl_port;
+-      struct dentry *dbgfs;
+ };
+ struct mlx5_esw_indir_table;
+@@ -342,7 +341,6 @@ struct mlx5_eswitch {
+               u32             large_group_num;
+       }  params;
+       struct blocking_notifier_head n_head;
+-      struct dentry *dbgfs;
+ };
+ void esw_offloads_disable(struct mlx5_eswitch *esw);
+@@ -705,9 +703,6 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_
+ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u16 vport_num);
+-void mlx5_esw_vport_debugfs_create(struct mlx5_eswitch *esw, u16 vport_num, bool is_sf, u16 sf_num);
+-void mlx5_esw_vport_debugfs_destroy(struct mlx5_eswitch *esw, u16 vport_num);
+-
+ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_port *dl_port,
+                                     u16 vport_num, u32 controller, u32 sfnum);
+ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num);
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 590df9bf39a56..727b30f3a229a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -3777,14 +3777,12 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
+       if (err)
+               goto devlink_err;
+-      mlx5_esw_vport_debugfs_create(esw, vport_num, true, sfnum);
+       err = mlx5_esw_offloads_rep_load(esw, vport_num);
+       if (err)
+               goto rep_err;
+       return 0;
+ rep_err:
+-      mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+       mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+ devlink_err:
+       mlx5_esw_vport_disable(esw, vport_num);
+@@ -3794,7 +3792,6 @@ int mlx5_esw_offloads_sf_vport_enable(struct mlx5_eswitch *esw, struct devlink_p
+ void mlx5_esw_offloads_sf_vport_disable(struct mlx5_eswitch *esw, u16 vport_num)
+ {
+       mlx5_esw_offloads_rep_unload(esw, vport_num);
+-      mlx5_esw_vport_debugfs_destroy(esw, vport_num);
+       mlx5_esw_devlink_sf_port_unregister(esw, vport_num);
+       mlx5_esw_vport_disable(esw, vport_num);
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.3/selftests-bpf-fix-pkg-config-call-building-sign-file.patch b/queue-6.3/selftests-bpf-fix-pkg-config-call-building-sign-file.patch
new file mode 100644 (file)
index 0000000..324fc44
--- /dev/null
@@ -0,0 +1,51 @@
+From 7cbe1b7d780a677c8dbb77d7d8998b4f632118e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Apr 2023 22:50:32 +0100
+Subject: selftests/bpf: Fix pkg-config call building sign-file
+
+From: Jeremy Sowden <jeremy@azazel.net>
+
+[ Upstream commit 5f5486b620cd43b16a1787ef92b9bc21bd72ef2e ]
+
+When building sign-file, the call to get the CFLAGS for libcrypto is
+missing white-space between `pkg-config` and `--cflags`:
+
+  $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null)
+
+Removing the redirection of stderr, we see:
+
+  $ make -C tools/testing/selftests/bpf sign-file
+  make: Entering directory '[...]/tools/testing/selftests/bpf'
+  make: pkg-config--cflags: No such file or directory
+    SIGN-FILE sign-file
+  make: Leaving directory '[...]/tools/testing/selftests/bpf'
+
+Add the missing space.
+
+Fixes: fc97590668ae ("selftests/bpf: Add test for bpf_verify_pkcs7_signature() kfunc")
+Signed-off-by: Jeremy Sowden <jeremy@azazel.net>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Reviewed-by: Roberto Sassu <roberto.sassu@huawei.com>
+Link: https://lore.kernel.org/bpf/20230426215032.415792-1-jeremy@azazel.net
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/bpf/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
+index b677dcd0b77af..ad01c9e1ff12b 100644
+--- a/tools/testing/selftests/bpf/Makefile
++++ b/tools/testing/selftests/bpf/Makefile
+@@ -197,7 +197,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
+ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
+       $(call msg,SIGN-FILE,,$@)
+-      $(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
++      $(Q)$(CC) $(shell $(HOSTPKG_CONFIG) --cflags libcrypto 2> /dev/null) \
+                 $< -o $@ \
+                 $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
+-- 
+2.39.2
+
index 0810d4fcbb10ee5faab7c58d2a1a26b089f9f5cd..5b1da4f3e1936b9f725be50ba183037b3ec289c3 100644 (file)
@@ -125,3 +125,44 @@ page_pool-fix-inconsistency-for-page_pool_ring_lock.patch
 net-ethernet-mtk_eth_soc-fix-qos-on-dsa-mac-on-non-mtk_netsys_v2-socs.patch
 net-phy-mscc-add-vsc8502-to-module_device_table.patch
 revert-arm64-dts-imx8mp-drop-simple-bus-from-fsl-imx8mp-media-blk-ctrl.patch
+firmware-arm_scmi-fix-incorrect-alloc_workqueue-invo.patch
+firmware-arm_ffa-fix-usage-of-partition-info-get-cou.patch
+spi-spi-geni-qcom-select-fifo-mode-for-chip-select.patch
+coresight-perf-release-coresight-path-when-alloc-tra.patch
+arm-dts-imx6ull-dhcor-set-and-limit-the-mode-for-pmi.patch
+selftests-bpf-fix-pkg-config-call-building-sign-file.patch
+power-supply-rt9467-fix-passing-zero-to-dev_err_prob.patch
+platform-x86-amd-pmf-fix-cnqf-and-auto-mode-after-re.patch
+bpf-netdev-init-the-offload-table-earlier.patch
+gpiolib-fix-allocation-of-mixed-dynamic-static-gpios.patch
+tls-rx-device-fix-checking-decryption-status.patch
+tls-rx-strp-set-the-skb-len-of-detached-cow-ed-skbs.patch
+tls-rx-strp-fix-determining-record-length-in-copy-mo.patch
+tls-rx-strp-force-mixed-decrypted-records-into-copy-.patch
+tls-rx-strp-factor-out-copying-skb-data.patch
+tls-rx-strp-preserve-decryption-status-of-skbs-when-.patch
+tls-rx-strp-don-t-use-gfp_kernel-in-softirq-context.patch
+net-fec-add-dma_wmb-to-ensure-correct-descriptor-val.patch
+cxl-port-fix-null-pointer-access-in-devm_cxl_add_por.patch
+asoc-intel-avs-fix-module-lookup.patch
+drm-i915-move-shared-dpll-disabling-into-crtc-disabl.patch
+drm-i915-disable-dplls-before-disconnecting-the-tc-p.patch
+drm-i915-fix-pipedmc-disabling-for-a-bigjoiner-confi.patch
+net-mlx5e-tc-fix-using-eswitch-mapping-in-nic-mode.patch
+revert-net-mlx5-expose-steering-dropped-packets-coun.patch
+revert-net-mlx5-expose-vnic-diagnostic-counters-for-.patch
+net-mlx5-e-switch-devcom-sync-devcom-events-and-devc.patch
+gpio-f7188x-fix-chip-name-and-pin-count-on-nuvoton-c.patch
+bpf-sockmap-pass-skb-ownership-through-read_skb.patch
+bpf-sockmap-convert-schedule_work-into-delayed_work.patch
+bpf-sockmap-reschedule-is-now-done-through-backlog.patch
+bpf-sockmap-improved-check-for-empty-queue.patch
+bpf-sockmap-handle-fin-correctly.patch
+bpf-sockmap-tcp-data-stall-on-recv-before-accept.patch
+bpf-sockmap-wake-up-polling-after-data-copy.patch
+bpf-sockmap-incorrectly-handling-copied_seq.patch
+blk-wbt-fix-that-wbt-can-t-be-disabled-by-default.patch
+blk-mq-fix-race-condition-in-active-queue-accounting.patch
+vfio-type1-check-pfn-valid-before-converting-to-stru.patch
+cpufreq-amd-pstate-remove-fast_switch_possible-flag-.patch
+net-phy-mscc-enable-vsc8501-2-rgmii-rx-clock.patch
diff --git a/queue-6.3/spi-spi-geni-qcom-select-fifo-mode-for-chip-select.patch b/queue-6.3/spi-spi-geni-qcom-select-fifo-mode-for-chip-select.patch
new file mode 100644 (file)
index 0000000..2d82773
--- /dev/null
@@ -0,0 +1,49 @@
+From b24ab44566f304c8cfe3e277bb6d263b21fea5a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 15:31:36 +0530
+Subject: spi: spi-geni-qcom: Select FIFO mode for chip select
+
+From: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com>
+
+[ Upstream commit 4c329f5da7cfa366bacfda1328a025dd38951317 ]
+
+Spi geni driver switches between FIFO and DMA modes based on xfer length.
+FIFO mode relies on M_CMD_DONE_EN interrupt for completion while DMA mode
+relies on XX_DMA_DONE.
+During dynamic switching, if FIFO mode is chosen, FIFO related interrupts
+are enabled and DMA related interrupts are disabled. And viceversa.
+Chip select shares M_CMD_DONE_EN interrupt with FIFO to check completion.
+Now, if a chip select operation is preceded by a DMA xfer, M_CMD_DONE_EN
+interrupt would have been disabled and hence it will never receive one
+resulting in timeout.
+
+For chip select, in addition to setting the xfer mode to FIFO,
+select_mode() to FIFO so that required interrupts are enabled.
+
+Fixes: e5f0dfa78ac7 ("spi: spi-geni-qcom: Add support for SE DMA mode")
+Suggested-by: Praveen Talari <quic_ptalari@quicinc.com
+Signed-off-by: Vijaya Krishna Nivarthi <quic_vnivarth@quicinc.com
+Reviewed-by: Douglas Anderson <dianders@chromium.org
+Link: https://lore.kernel.org/r/1683626496-9685-1-git-send-email-quic_vnivarth@quicinc.com
+Signed-off-by: Mark Brown <broonie@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-geni-qcom.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
+index babb039bcb431..b106faf21a723 100644
+--- a/drivers/spi/spi-geni-qcom.c
++++ b/drivers/spi/spi-geni-qcom.c
+@@ -294,6 +294,8 @@ static void spi_geni_set_cs(struct spi_device *slv, bool set_flag)
+       mas->cs_flag = set_flag;
+       /* set xfer_mode to FIFO to complete cs_done in isr */
+       mas->cur_xfer_mode = GENI_SE_FIFO;
++      geni_se_select_mode(se, mas->cur_xfer_mode);
++
+       reinit_completion(&mas->cs_done);
+       if (set_flag)
+               geni_se_setup_m_cmd(se, SPI_CS_ASSERT, 0);
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-device-fix-checking-decryption-status.patch b/queue-6.3/tls-rx-device-fix-checking-decryption-status.patch
new file mode 100644 (file)
index 0000000..bcf1bae
--- /dev/null
@@ -0,0 +1,44 @@
+From 6d1c925a2cf65e4946df56f5d47183ea9232fda1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:36 -0700
+Subject: tls: rx: device: fix checking decryption status
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit b3a03b540e3cf62a255213d084d76d71c02793d5 ]
+
+skb->len covers the entire skb, including the frag_list.
+In fact we're guaranteed that rxm->full_len <= skb->len,
+so since the change under Fixes we were not checking decrypt
+status of any skb but the first.
+
+Note that the skb_pagelen() added here may feel a bit costly,
+but it's removed by subsequent fixes, anyway.
+
+Reported-by: Tariq Toukan <tariqt@nvidia.com>
+Fixes: 86b259f6f888 ("tls: rx: device: bound the frag walk")
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_device.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index a7cc4f9faac28..3b87c7b04ac87 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1012,7 +1012,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
+       struct sk_buff *skb_iter;
+       int left;
+-      left = rxm->full_len - skb->len;
++      left = rxm->full_len + rxm->offset - skb_pagelen(skb);
+       /* Check if all the data is decrypted already */
+       skb_iter = skb_shinfo(skb)->frag_list;
+       while (skb_iter && left > 0) {
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-don-t-use-gfp_kernel-in-softirq-context.patch b/queue-6.3/tls-rx-strp-don-t-use-gfp_kernel-in-softirq-context.patch
new file mode 100644 (file)
index 0000000..4945765
--- /dev/null
@@ -0,0 +1,52 @@
+From d55295246bad15f0ffc636c63cf2e7d3fe37e681 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:42 -0700
+Subject: tls: rx: strp: don't use GFP_KERNEL in softirq context
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 74836ec828fe17b63f2006fdbf53311d691396bf ]
+
+When receive buffer is small, or the TCP rx queue looks too
+complicated to bother using it directly - we allocate a new
+skb and copy data into it.
+
+We already use sk->sk_allocation... but nothing actually
+sets it to GFP_ATOMIC on the ->sk_data_ready() path.
+
+Users of HW offload are far more likely to experience problems
+due to scheduling while atomic. "Copy mode" is very rarely
+triggered with SW crypto.
+
+Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser")
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_sw.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
+index 635b8bf6b937c..6e6a7c37d685c 100644
+--- a/net/tls/tls_sw.c
++++ b/net/tls/tls_sw.c
+@@ -2304,10 +2304,14 @@ static void tls_data_ready(struct sock *sk)
+       struct tls_context *tls_ctx = tls_get_ctx(sk);
+       struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
+       struct sk_psock *psock;
++      gfp_t alloc_save;
+       trace_sk_data_ready(sk);
++      alloc_save = sk->sk_allocation;
++      sk->sk_allocation = GFP_ATOMIC;
+       tls_strp_data_ready(&ctx->strp);
++      sk->sk_allocation = alloc_save;
+       psock = sk_psock_get(sk);
+       if (psock) {
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-factor-out-copying-skb-data.patch b/queue-6.3/tls-rx-strp-factor-out-copying-skb-data.patch
new file mode 100644 (file)
index 0000000..4058e43
--- /dev/null
@@ -0,0 +1,84 @@
+From 8683bf9c4abde1291d0f10e9a96d77f72f5ffc16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:40 -0700
+Subject: tls: rx: strp: factor out copying skb data
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit c1c607b1e5d5477d82ca6a86a05a4f10907b33ee ]
+
+We'll need to copy input skbs individually in the next patch.
+Factor that code out (without assuming we're copying a full record).
+
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: eca9bfafee3a ("tls: rx: strp: preserve decryption status of skbs when needed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_strp.c | 33 +++++++++++++++++++++++----------
+ 1 file changed, 23 insertions(+), 10 deletions(-)
+
+diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
+index e2e48217e7ac9..61fbf84baf9e0 100644
+--- a/net/tls/tls_strp.c
++++ b/net/tls/tls_strp.c
+@@ -34,31 +34,44 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
+       strp->anchor = NULL;
+ }
+-/* Create a new skb with the contents of input copied to its page frags */
+-static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
++static struct sk_buff *
++tls_strp_skb_copy(struct tls_strparser *strp, struct sk_buff *in_skb,
++                int offset, int len)
+ {
+-      struct strp_msg *rxm;
+       struct sk_buff *skb;
+-      int i, err, offset;
++      int i, err;
+-      skb = alloc_skb_with_frags(0, strp->stm.full_len, TLS_PAGE_ORDER,
++      skb = alloc_skb_with_frags(0, len, TLS_PAGE_ORDER,
+                                  &err, strp->sk->sk_allocation);
+       if (!skb)
+               return NULL;
+-      offset = strp->stm.offset;
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+-              WARN_ON_ONCE(skb_copy_bits(strp->anchor, offset,
++              WARN_ON_ONCE(skb_copy_bits(in_skb, offset,
+                                          skb_frag_address(frag),
+                                          skb_frag_size(frag)));
+               offset += skb_frag_size(frag);
+       }
+-      skb->len = strp->stm.full_len;
+-      skb->data_len = strp->stm.full_len;
+-      skb_copy_header(skb, strp->anchor);
++      skb->len = len;
++      skb->data_len = len;
++      skb_copy_header(skb, in_skb);
++      return skb;
++}
++
++/* Create a new skb with the contents of input copied to its page frags */
++static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
++{
++      struct strp_msg *rxm;
++      struct sk_buff *skb;
++
++      skb = tls_strp_skb_copy(strp, strp->anchor, strp->stm.offset,
++                              strp->stm.full_len);
++      if (!skb)
++              return NULL;
++
+       rxm = strp_msg(skb);
+       rxm->offset = 0;
+       return skb;
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-fix-determining-record-length-in-copy-mo.patch b/queue-6.3/tls-rx-strp-fix-determining-record-length-in-copy-mo.patch
new file mode 100644 (file)
index 0000000..4b2a9fc
--- /dev/null
@@ -0,0 +1,71 @@
+From 48f88156c26b585a26863621bcde49f2a320f845 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:39 -0700
+Subject: tls: rx: strp: fix determining record length in copy mode
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 8b0c0dc9fbbd01e58a573a41c38885f9e4c17696 ]
+
+We call tls_rx_msg_size(skb) before doing skb->len += chunk.
+So the tls_rx_msg_size() code will see old skb->len, most
+likely leading to an over-read.
+
+Worst case we will over read an entire record, next iteration
+will try to trim the skb but may end up turning frag len negative
+or discarding the subsequent record (since we already told TCP
+we've read it during previous read but now we'll trim it out of
+the skb).
+
+Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser")
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_strp.c | 21 +++++++++++++++------
+ 1 file changed, 15 insertions(+), 6 deletions(-)
+
+diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
+index 24016c865e004..9889df5ce0660 100644
+--- a/net/tls/tls_strp.c
++++ b/net/tls/tls_strp.c
+@@ -210,19 +210,28 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+                                          skb_frag_size(frag),
+                                          chunk));
+-              sz = tls_rx_msg_size(strp, strp->anchor);
++              skb->len += chunk;
++              skb->data_len += chunk;
++              skb_frag_size_add(frag, chunk);
++
++              sz = tls_rx_msg_size(strp, skb);
+               if (sz < 0) {
+                       desc->error = sz;
+                       return 0;
+               }
+               /* We may have over-read, sz == 0 is guaranteed under-read */
+-              if (sz > 0)
+-                      chunk = min_t(size_t, chunk, sz - skb->len);
++              if (unlikely(sz && sz < skb->len)) {
++                      int over = skb->len - sz;
++
++                      WARN_ON_ONCE(over > chunk);
++                      skb->len -= over;
++                      skb->data_len -= over;
++                      skb_frag_size_add(frag, -over);
++
++                      chunk -= over;
++              }
+-              skb->len += chunk;
+-              skb->data_len += chunk;
+-              skb_frag_size_add(frag, chunk);
+               frag++;
+               len -= chunk;
+               offset += chunk;
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-force-mixed-decrypted-records-into-copy-.patch b/queue-6.3/tls-rx-strp-force-mixed-decrypted-records-into-copy-.patch
new file mode 100644 (file)
index 0000000..6d5b303
--- /dev/null
@@ -0,0 +1,97 @@
+From 7181f1c1bd644d7d374d73a5543a0d06f07b5b1a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:38 -0700
+Subject: tls: rx: strp: force mixed decrypted records into copy mode
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 14c4be92ebb3e36e392aa9dd8f314038a9f96f3c ]
+
+If a record is partially decrypted we'll have to CoW it, anyway,
+so go into copy mode and allocate a writable skb right away.
+
+This will make subsequent fix simpler because we won't have to
+teach tls_strp_msg_make_copy() how to copy skbs while preserving
+decrypt status.
+
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Stable-dep-of: eca9bfafee3a ("tls: rx: strp: preserve decryption status of skbs when needed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/skbuff.h | 10 ++++++++++
+ net/tls/tls_strp.c     | 16 +++++++++++-----
+ 2 files changed, 21 insertions(+), 5 deletions(-)
+
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index dbcaac8b69665..4a882f9ba1f1f 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -1577,6 +1577,16 @@ static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
+       to->l4_hash = from->l4_hash;
+ };
++static inline int skb_cmp_decrypted(const struct sk_buff *skb1,
++                                  const struct sk_buff *skb2)
++{
++#ifdef CONFIG_TLS_DEVICE
++      return skb2->decrypted - skb1->decrypted;
++#else
++      return 0;
++#endif
++}
++
+ static inline void skb_copy_decrypted(struct sk_buff *to,
+                                     const struct sk_buff *from)
+ {
+diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
+index 9889df5ce0660..e2e48217e7ac9 100644
+--- a/net/tls/tls_strp.c
++++ b/net/tls/tls_strp.c
+@@ -326,15 +326,19 @@ static int tls_strp_read_copy(struct tls_strparser *strp, bool qshort)
+       return 0;
+ }
+-static bool tls_strp_check_no_dup(struct tls_strparser *strp)
++static bool tls_strp_check_queue_ok(struct tls_strparser *strp)
+ {
+       unsigned int len = strp->stm.offset + strp->stm.full_len;
+-      struct sk_buff *skb;
++      struct sk_buff *first, *skb;
+       u32 seq;
+-      skb = skb_shinfo(strp->anchor)->frag_list;
+-      seq = TCP_SKB_CB(skb)->seq;
++      first = skb_shinfo(strp->anchor)->frag_list;
++      skb = first;
++      seq = TCP_SKB_CB(first)->seq;
++      /* Make sure there's no duplicate data in the queue,
++       * and the decrypted status matches.
++       */
+       while (skb->len < len) {
+               seq += skb->len;
+               len -= skb->len;
+@@ -342,6 +346,8 @@ static bool tls_strp_check_no_dup(struct tls_strparser *strp)
+               if (TCP_SKB_CB(skb)->seq != seq)
+                       return false;
++              if (skb_cmp_decrypted(first, skb))
++                      return false;
+       }
+       return true;
+@@ -422,7 +428,7 @@ static int tls_strp_read_sock(struct tls_strparser *strp)
+                       return tls_strp_read_copy(strp, true);
+       }
+-      if (!tls_strp_check_no_dup(strp))
++      if (!tls_strp_check_queue_ok(strp))
+               return tls_strp_read_copy(strp, false);
+       strp->msg_ready = 1;
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-preserve-decryption-status-of-skbs-when-.patch b/queue-6.3/tls-rx-strp-preserve-decryption-status-of-skbs-when-.patch
new file mode 100644 (file)
index 0000000..617c47a
--- /dev/null
@@ -0,0 +1,262 @@
+From a64f758b427615555c54411b19464f1f86e17374 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:41 -0700
+Subject: tls: rx: strp: preserve decryption status of skbs when needed
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit eca9bfafee3a0487e59c59201ae14c7594ba940a ]
+
+When receive buffer is small we try to copy out the data from
+TCP into a skb maintained by TLS to prevent connection from
+stalling. Unfortunately if a single record is made up of a mix
+of decrypted and non-decrypted skbs combining them into a single
+skb leads to loss of decryption status, resulting in decryption
+errors or data corruption.
+
+Similarly when trying to use TCP receive queue directly we need
+to make sure that all the skbs within the record have the same
+status. If we don't the mixed status will be detected correctly
+but we'll CoW the anchor, again collapsing it into a single paged
+skb without decrypted status preserved. So the "fixup" code will
+not know which parts of skb to re-encrypt.
+
+Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser")
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tls.h    |   1 +
+ net/tls/tls.h        |   5 ++
+ net/tls/tls_device.c |  22 +++-----
+ net/tls/tls_strp.c   | 117 ++++++++++++++++++++++++++++++++++++-------
+ 4 files changed, 114 insertions(+), 31 deletions(-)
+
+diff --git a/include/net/tls.h b/include/net/tls.h
+index 154949c7b0c88..c36bf4c50027e 100644
+--- a/include/net/tls.h
++++ b/include/net/tls.h
+@@ -124,6 +124,7 @@ struct tls_strparser {
+       u32 mark : 8;
+       u32 stopped : 1;
+       u32 copy_mode : 1;
++      u32 mixed_decrypted : 1;
+       u32 msg_ready : 1;
+       struct strp_msg stm;
+diff --git a/net/tls/tls.h b/net/tls/tls.h
+index 804c3880d0288..0672acab27731 100644
+--- a/net/tls/tls.h
++++ b/net/tls/tls.h
+@@ -167,6 +167,11 @@ static inline bool tls_strp_msg_ready(struct tls_sw_context_rx *ctx)
+       return ctx->strp.msg_ready;
+ }
++static inline bool tls_strp_msg_mixed_decrypted(struct tls_sw_context_rx *ctx)
++{
++      return ctx->strp.mixed_decrypted;
++}
++
+ #ifdef CONFIG_TLS_DEVICE
+ int tls_device_init(void);
+ void tls_device_cleanup(void);
+diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
+index 3b87c7b04ac87..bf69c9d6d06c0 100644
+--- a/net/tls/tls_device.c
++++ b/net/tls/tls_device.c
+@@ -1007,20 +1007,14 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx)
+       struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx);
+       struct sk_buff *skb = tls_strp_msg(sw_ctx);
+       struct strp_msg *rxm = strp_msg(skb);
+-      int is_decrypted = skb->decrypted;
+-      int is_encrypted = !is_decrypted;
+-      struct sk_buff *skb_iter;
+-      int left;
+-
+-      left = rxm->full_len + rxm->offset - skb_pagelen(skb);
+-      /* Check if all the data is decrypted already */
+-      skb_iter = skb_shinfo(skb)->frag_list;
+-      while (skb_iter && left > 0) {
+-              is_decrypted &= skb_iter->decrypted;
+-              is_encrypted &= !skb_iter->decrypted;
+-
+-              left -= skb_iter->len;
+-              skb_iter = skb_iter->next;
++      int is_decrypted, is_encrypted;
++
++      if (!tls_strp_msg_mixed_decrypted(sw_ctx)) {
++              is_decrypted = skb->decrypted;
++              is_encrypted = !is_decrypted;
++      } else {
++              is_decrypted = 0;
++              is_encrypted = 0;
+       }
+       trace_tls_device_decrypted(sk, tcp_sk(sk)->copied_seq - rxm->full_len,
+diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
+index 61fbf84baf9e0..da95abbb7ea32 100644
+--- a/net/tls/tls_strp.c
++++ b/net/tls/tls_strp.c
+@@ -29,7 +29,8 @@ static void tls_strp_anchor_free(struct tls_strparser *strp)
+       struct skb_shared_info *shinfo = skb_shinfo(strp->anchor);
+       DEBUG_NET_WARN_ON_ONCE(atomic_read(&shinfo->dataref) != 1);
+-      shinfo->frag_list = NULL;
++      if (!strp->copy_mode)
++              shinfo->frag_list = NULL;
+       consume_skb(strp->anchor);
+       strp->anchor = NULL;
+ }
+@@ -195,22 +196,22 @@ static void tls_strp_flush_anchor_copy(struct tls_strparser *strp)
+       for (i = 0; i < shinfo->nr_frags; i++)
+               __skb_frag_unref(&shinfo->frags[i], false);
+       shinfo->nr_frags = 0;
++      if (strp->copy_mode) {
++              kfree_skb_list(shinfo->frag_list);
++              shinfo->frag_list = NULL;
++      }
+       strp->copy_mode = 0;
++      strp->mixed_decrypted = 0;
+ }
+-static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+-                         unsigned int offset, size_t in_len)
++static int tls_strp_copyin_frag(struct tls_strparser *strp, struct sk_buff *skb,
++                              struct sk_buff *in_skb, unsigned int offset,
++                              size_t in_len)
+ {
+-      struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
+-      struct sk_buff *skb;
+-      skb_frag_t *frag;
+       size_t len, chunk;
++      skb_frag_t *frag;
+       int sz;
+-      if (strp->msg_ready)
+-              return 0;
+-
+-      skb = strp->anchor;
+       frag = &skb_shinfo(skb)->frags[skb->len / PAGE_SIZE];
+       len = in_len;
+@@ -228,10 +229,8 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+               skb_frag_size_add(frag, chunk);
+               sz = tls_rx_msg_size(strp, skb);
+-              if (sz < 0) {
+-                      desc->error = sz;
+-                      return 0;
+-              }
++              if (sz < 0)
++                      return sz;
+               /* We may have over-read, sz == 0 is guaranteed under-read */
+               if (unlikely(sz && sz < skb->len)) {
+@@ -271,15 +270,99 @@ static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
+               offset += chunk;
+       }
+-      if (strp->stm.full_len == skb->len) {
++read_done:
++      return in_len - len;
++}
++
++static int tls_strp_copyin_skb(struct tls_strparser *strp, struct sk_buff *skb,
++                             struct sk_buff *in_skb, unsigned int offset,
++                             size_t in_len)
++{
++      struct sk_buff *nskb, *first, *last;
++      struct skb_shared_info *shinfo;
++      size_t chunk;
++      int sz;
++
++      if (strp->stm.full_len)
++              chunk = strp->stm.full_len - skb->len;
++      else
++              chunk = TLS_MAX_PAYLOAD_SIZE + PAGE_SIZE;
++      chunk = min(chunk, in_len);
++
++      nskb = tls_strp_skb_copy(strp, in_skb, offset, chunk);
++      if (!nskb)
++              return -ENOMEM;
++
++      shinfo = skb_shinfo(skb);
++      if (!shinfo->frag_list) {
++              shinfo->frag_list = nskb;
++              nskb->prev = nskb;
++      } else {
++              first = shinfo->frag_list;
++              last = first->prev;
++              last->next = nskb;
++              first->prev = nskb;
++      }
++
++      skb->len += chunk;
++      skb->data_len += chunk;
++
++      if (!strp->stm.full_len) {
++              sz = tls_rx_msg_size(strp, skb);
++              if (sz < 0)
++                      return sz;
++
++              /* We may have over-read, sz == 0 is guaranteed under-read */
++              if (unlikely(sz && sz < skb->len)) {
++                      int over = skb->len - sz;
++
++                      WARN_ON_ONCE(over > chunk);
++                      skb->len -= over;
++                      skb->data_len -= over;
++                      __pskb_trim(nskb, nskb->len - over);
++
++                      chunk -= over;
++              }
++
++              strp->stm.full_len = sz;
++      }
++
++      return chunk;
++}
++
++static int tls_strp_copyin(read_descriptor_t *desc, struct sk_buff *in_skb,
++                         unsigned int offset, size_t in_len)
++{
++      struct tls_strparser *strp = (struct tls_strparser *)desc->arg.data;
++      struct sk_buff *skb;
++      int ret;
++
++      if (strp->msg_ready)
++              return 0;
++
++      skb = strp->anchor;
++      if (!skb->len)
++              skb_copy_decrypted(skb, in_skb);
++      else
++              strp->mixed_decrypted |= !!skb_cmp_decrypted(skb, in_skb);
++
++      if (IS_ENABLED(CONFIG_TLS_DEVICE) && strp->mixed_decrypted)
++              ret = tls_strp_copyin_skb(strp, skb, in_skb, offset, in_len);
++      else
++              ret = tls_strp_copyin_frag(strp, skb, in_skb, offset, in_len);
++      if (ret < 0) {
++              desc->error = ret;
++              ret = 0;
++      }
++
++      if (strp->stm.full_len && strp->stm.full_len == skb->len) {
+               desc->count = 0;
+               strp->msg_ready = 1;
+               tls_rx_msg_ready(strp);
+       }
+-read_done:
+-      return in_len - len;
++      return ret;
+ }
+ static int tls_strp_read_copyin(struct tls_strparser *strp)
+-- 
+2.39.2
+
diff --git a/queue-6.3/tls-rx-strp-set-the-skb-len-of-detached-cow-ed-skbs.patch b/queue-6.3/tls-rx-strp-set-the-skb-len-of-detached-cow-ed-skbs.patch
new file mode 100644 (file)
index 0000000..0b80521
--- /dev/null
@@ -0,0 +1,40 @@
+From 5abb221ffa83e7c2232ca01994269fb16f4b83be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 May 2023 18:50:37 -0700
+Subject: tls: rx: strp: set the skb->len of detached / CoW'ed skbs
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 210620ae44a83f25220450bbfcc22e6fe986b25f ]
+
+alloc_skb_with_frags() fills in page frag sizes but does not
+set skb->len and skb->data_len. Set those correctly otherwise
+device offload will most likely generate an empty skb and
+hit the BUG() at the end of __skb_nsg().
+
+Fixes: 84c61fe1a75b ("tls: rx: do not use the standard strparser")
+Tested-by: Shai Amiram <samiram@nvidia.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/tls/tls_strp.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/tls/tls_strp.c b/net/tls/tls_strp.c
+index 955ac3e0bf4d3..24016c865e004 100644
+--- a/net/tls/tls_strp.c
++++ b/net/tls/tls_strp.c
+@@ -56,6 +56,8 @@ static struct sk_buff *tls_strp_msg_make_copy(struct tls_strparser *strp)
+               offset += skb_frag_size(frag);
+       }
++      skb->len = strp->stm.full_len;
++      skb->data_len = strp->stm.full_len;
+       skb_copy_header(skb, strp->anchor);
+       rxm = strp_msg(skb);
+       rxm->offset = 0;
+-- 
+2.39.2
+
diff --git a/queue-6.3/vfio-type1-check-pfn-valid-before-converting-to-stru.patch b/queue-6.3/vfio-type1-check-pfn-valid-before-converting-to-stru.patch
new file mode 100644 (file)
index 0000000..536d3b5
--- /dev/null
@@ -0,0 +1,64 @@
+From 75822bfcece8d5b2a9eb942afc1213b987882174 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 14:58:43 +0800
+Subject: vfio/type1: check pfn valid before converting to struct page
+
+From: Yan Zhao <yan.y.zhao@intel.com>
+
+[ Upstream commit 4752354af71043e6fd72ef5490ed6da39e6cab4a ]
+
+Check physical PFN is valid before converting the PFN to a struct page
+pointer to be returned to caller of vfio_pin_pages().
+
+vfio_pin_pages() pins user pages with contiguous IOVA.
+If the IOVA of a user page to be pinned belongs to vma of vm_flags
+VM_PFNMAP, pin_user_pages_remote() will return -EFAULT without returning
+struct page address for this PFN. This is because usually this kind of PFN
+(e.g. MMIO PFN) has no valid struct page address associated.
+Upon this error, vaddr_get_pfns() will obtain the physical PFN directly.
+
+While previously vfio_pin_pages() returns to caller PFN arrays directly,
+after commit
+34a255e67615 ("vfio: Replace phys_pfn with pages for vfio_pin_pages()"),
+PFNs will be converted to "struct page *" unconditionally and therefore
+the returned "struct page *" array may contain invalid struct page
+addresses.
+
+Given current in-tree users of vfio_pin_pages() only expect "struct page *
+returned, check PFN validity and return -EINVAL to let the caller be
+aware of IOVAs to be pinned containing PFN not able to be returned in
+"struct page *" array. So that, the caller will not consume the returned
+pointer (e.g. test PageReserved()) and avoid error like "supervisor read
+access in kernel mode".
+
+Fixes: 34a255e67615 ("vfio: Replace phys_pfn with pages for vfio_pin_pages()")
+Cc: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
+Signed-off-by: Yan Zhao <yan.y.zhao@intel.com>
+Reviewed-by: Sean Christopherson <seanjc@google.com>
+Link: https://lore.kernel.org/r/20230519065843.10653-1-yan.y.zhao@intel.com
+Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/vfio/vfio_iommu_type1.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
+index 493c31de0edb9..0620dbe5cca0c 100644
+--- a/drivers/vfio/vfio_iommu_type1.c
++++ b/drivers/vfio/vfio_iommu_type1.c
+@@ -860,6 +860,11 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
+               if (ret)
+                       goto pin_unwind;
++              if (!pfn_valid(phys_pfn)) {
++                      ret = -EINVAL;
++                      goto pin_unwind;
++              }
++
+               ret = vfio_add_to_pfn_list(dma, iova, phys_pfn);
+               if (ret) {
+                       if (put_pfn(phys_pfn, dma->prot) && do_accounting)
+-- 
+2.39.2
+