]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.10
authorSasha Levin <sashal@kernel.org>
Sun, 22 Jan 2023 04:27:50 +0000 (23:27 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 22 Jan 2023 04:27:50 +0000 (23:27 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
21 files changed:
queue-5.10/alsa-hda-realtek-turn-on-power-early.patch [new file with mode: 0644]
queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch [new file with mode: 0644]
queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch [new file with mode: 0644]
queue-5.10/drm-i915-gt-reset-twice.patch [new file with mode: 0644]
queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch [new file with mode: 0644]
queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch [new file with mode: 0644]
queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch [new file with mode: 0644]
queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch [new file with mode: 0644]
queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch [new file with mode: 0644]
queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch [new file with mode: 0644]
queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch [new file with mode: 0644]
queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch [new file with mode: 0644]
queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch [new file with mode: 0644]
queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch [new file with mode: 0644]
queue-5.10/io_uring-improve-send-recv-error-handling.patch [new file with mode: 0644]
queue-5.10/io_uring-lock-overflowing-for-iopoll.patch [new file with mode: 0644]
queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch [new file with mode: 0644]
queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch [new file with mode: 0644]
queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch [new file with mode: 0644]
queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch [new file with mode: 0644]
queue-5.10/series

diff --git a/queue-5.10/alsa-hda-realtek-turn-on-power-early.patch b/queue-5.10/alsa-hda-realtek-turn-on-power-early.patch
new file mode 100644 (file)
index 0000000..e4d7cae
--- /dev/null
@@ -0,0 +1,86 @@
+From 6332fca1aff9874462003d3878c7a7e069280a0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Dec 2022 15:22:25 +0800
+Subject: ALSA: hda/realtek - Turn on power early
+
+From: Yuchi Yang <yangyuchi66@gmail.com>
+
+[ Upstream commit 1f680609bf1beac20e2a31ddcb1b88874123c39f ]
+
+Turn on power early to avoid wrong state for power relation register.
+This can earlier update JD state when resume back.
+
+Signed-off-by: Yuchi Yang <yangyuchi66@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/e35d8f4fa18f4448a2315cc7d4a3715f@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++--------------
+ 1 file changed, 16 insertions(+), 14 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index eb7dd457ef5a..cfd86389d37f 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -3561,6 +3561,15 @@ static void alc256_init(struct hda_codec *codec)
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
+       bool hp_pin_sense;
++      if (spec->ultra_low_power) {
++              alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
++              alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
++              alc_update_coef_idx(codec, 0x08, 7<<4, 0);
++              alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
++              alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
++              msleep(30);
++      }
++
+       if (!hp_pin)
+               hp_pin = 0x21;
+@@ -3572,14 +3581,6 @@ static void alc256_init(struct hda_codec *codec)
+               msleep(2);
+       alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
+-      if (spec->ultra_low_power) {
+-              alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1);
+-              alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2);
+-              alc_update_coef_idx(codec, 0x08, 7<<4, 0);
+-              alc_update_coef_idx(codec, 0x3b, 1<<15, 0);
+-              alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+-              msleep(30);
+-      }
+       snd_hda_codec_write(codec, hp_pin, 0,
+                           AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE);
+@@ -3661,6 +3662,13 @@ static void alc225_init(struct hda_codec *codec)
+       hda_nid_t hp_pin = alc_get_hp_pin(spec);
+       bool hp1_pin_sense, hp2_pin_sense;
++      if (spec->ultra_low_power) {
++              alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
++              alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
++              alc_update_coef_idx(codec, 0x33, 1<<11, 0);
++              msleep(30);
++      }
++
+       if (!hp_pin)
+               hp_pin = 0x21;
+       msleep(30);
+@@ -3672,12 +3680,6 @@ static void alc225_init(struct hda_codec *codec)
+               msleep(2);
+       alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */
+-      if (spec->ultra_low_power) {
+-              alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2);
+-              alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6);
+-              alc_update_coef_idx(codec, 0x33, 1<<11, 0);
+-              msleep(30);
+-      }
+       if (hp1_pin_sense || spec->ultra_low_power)
+               snd_hda_codec_write(codec, hp_pin, 0,
+-- 
+2.39.0
+
diff --git a/queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch b/queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch
new file mode 100644 (file)
index 0000000..88c68de
--- /dev/null
@@ -0,0 +1,95 @@
+From a52afdced306001f6015a7468158effdca550a1b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Dec 2022 11:28:29 +0100
+Subject: Bluetooth: hci_qca: Fix driver shutdown on closed serdev
+
+From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+[ Upstream commit 272970be3dabd24cbe50e393ffee8f04aec3b9a8 ]
+
+The driver shutdown callback (which sends EDL_SOC_RESET to the device
+over serdev) should not be invoked when HCI device is not open (e.g. if
+hci_dev_open_sync() failed), because the serdev and its TTY are not open
+either.  Also skip this step if device is powered off
+(qca_power_shutdown()).
+
+The shutdown callback causes use-after-free during system reboot with
+Qualcomm Atheros Bluetooth:
+
+  Unable to handle kernel paging request at virtual address
+  0072662f67726fd7
+  ...
+  CPU: 6 PID: 1 Comm: systemd-shutdow Tainted: G        W
+  6.1.0-rt5-00325-g8a5f56bcfcca #8
+  Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT)
+  Call trace:
+   tty_driver_flush_buffer+0x4/0x30
+   serdev_device_write_flush+0x24/0x34
+   qca_serdev_shutdown+0x80/0x130 [hci_uart]
+   device_shutdown+0x15c/0x260
+   kernel_restart+0x48/0xac
+
+KASAN report:
+
+  BUG: KASAN: use-after-free in tty_driver_flush_buffer+0x1c/0x50
+  Read of size 8 at addr ffff16270c2e0018 by task systemd-shutdow/1
+
+  CPU: 7 PID: 1 Comm: systemd-shutdow Not tainted
+  6.1.0-next-20221220-00014-gb85aaf97fb01-dirty #28
+  Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT)
+  Call trace:
+   dump_backtrace.part.0+0xdc/0xf0
+   show_stack+0x18/0x30
+   dump_stack_lvl+0x68/0x84
+   print_report+0x188/0x488
+   kasan_report+0xa4/0xf0
+   __asan_load8+0x80/0xac
+   tty_driver_flush_buffer+0x1c/0x50
+   ttyport_write_flush+0x34/0x44
+   serdev_device_write_flush+0x48/0x60
+   qca_serdev_shutdown+0x124/0x274
+   device_shutdown+0x1e8/0x350
+   kernel_restart+0x48/0xb0
+   __do_sys_reboot+0x244/0x2d0
+   __arm64_sys_reboot+0x54/0x70
+   invoke_syscall+0x60/0x190
+   el0_svc_common.constprop.0+0x7c/0x160
+   do_el0_svc+0x44/0xf0
+   el0_svc+0x2c/0x6c
+   el0t_64_sync_handler+0xbc/0x140
+   el0t_64_sync+0x190/0x194
+
+Fixes: 7e7bbddd029b ("Bluetooth: hci_qca: Fix qca6390 enable failure after warm reboot")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/bluetooth/hci_qca.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index 652290425028..096a604ef47f 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -2064,10 +2064,17 @@ static void qca_serdev_shutdown(struct device *dev)
+       int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
+       struct serdev_device *serdev = to_serdev_device(dev);
+       struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
++      struct hci_uart *hu = &qcadev->serdev_hu;
++      struct hci_dev *hdev = hu->hdev;
++      struct qca_data *qca = hu->priv;
+       const u8 ibs_wake_cmd[] = { 0xFD };
+       const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
+       if (qcadev->btsoc_type == QCA_QCA6390) {
++              if (test_bit(QCA_BT_OFF, &qca->flags) ||
++                  !test_bit(HCI_RUNNING, &hdev->flags))
++                      return;
++
+               serdev_device_write_flush(serdev);
+               ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
+                                             sizeof(ibs_wake_cmd));
+-- 
+2.39.0
+
diff --git a/queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch b/queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch
new file mode 100644 (file)
index 0000000..e71f801
--- /dev/null
@@ -0,0 +1,164 @@
+From 7b3e495e12e1eccee6464b6ab99c1aee4dc222b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Oct 2020 20:50:21 +0530
+Subject: Bluetooth: hci_qca: Wait for timeout during suspend
+
+From: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
+
+[ Upstream commit 2be43abac5a839d44bf9d14716573ae0ac920f2b ]
+
+Currently qca_suspend() is relied on IBS mechanism. During
+FW download and memory dump collections, IBS will be disabled.
+In those cases, driver will allow suspend and still uses the
+serdev port, which results to errors. Now added a wait timeout
+if suspend is triggered during FW download and memory collections.
+
+Signed-off-by: Venkata Lakshmi Narayana Gubba <gubbaven@codeaurora.org>
+Signed-off-by: Balakrishna Godavarthi <bgodavar@codeaurora.org>
+Reviewed-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
+Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
+Stable-dep-of: 272970be3dab ("Bluetooth: hci_qca: Fix driver shutdown on closed serdev")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/bluetooth/hci_qca.c | 48 ++++++++++++++++++++++++++++++-------
+ 1 file changed, 39 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
+index 60b0e13bb9fc..652290425028 100644
+--- a/drivers/bluetooth/hci_qca.c
++++ b/drivers/bluetooth/hci_qca.c
+@@ -50,6 +50,8 @@
+ #define IBS_HOST_TX_IDLE_TIMEOUT_MS   2000
+ #define CMD_TRANS_TIMEOUT_MS          100
+ #define MEMDUMP_TIMEOUT_MS            8000
++#define IBS_DISABLE_SSR_TIMEOUT_MS    (MEMDUMP_TIMEOUT_MS + 1000)
++#define FW_DOWNLOAD_TIMEOUT_MS                3000
+ /* susclk rate */
+ #define SUSCLK_RATE_32KHZ     32768
+@@ -68,12 +70,13 @@
+ #define QCA_MEMDUMP_BYTE              0xFB
+ enum qca_flags {
+-      QCA_IBS_ENABLED,
++      QCA_IBS_DISABLED,
+       QCA_DROP_VENDOR_EVENT,
+       QCA_SUSPENDING,
+       QCA_MEMDUMP_COLLECTION,
+       QCA_HW_ERROR_EVENT,
+-      QCA_SSR_TRIGGERED
++      QCA_SSR_TRIGGERED,
++      QCA_BT_OFF
+ };
+ enum qca_capabilities {
+@@ -870,7 +873,7 @@ static int qca_enqueue(struct hci_uart *hu, struct sk_buff *skb)
+        * Out-Of-Band(GPIOs control) sleep is selected.
+        * Don't wake the device up when suspending.
+        */
+-      if (!test_bit(QCA_IBS_ENABLED, &qca->flags) ||
++      if (test_bit(QCA_IBS_DISABLED, &qca->flags) ||
+           test_bit(QCA_SUSPENDING, &qca->flags)) {
+               skb_queue_tail(&qca->txq, skb);
+               spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+@@ -1015,7 +1018,7 @@ static void qca_controller_memdump(struct work_struct *work)
+                        * the controller to send the dump is 8 seconds. let us
+                        * start timer to handle this asynchronous activity.
+                        */
+-                      clear_bit(QCA_IBS_ENABLED, &qca->flags);
++                      set_bit(QCA_IBS_DISABLED, &qca->flags);
+                       set_bit(QCA_MEMDUMP_COLLECTION, &qca->flags);
+                       dump = (void *) skb->data;
+                       dump_size = __le32_to_cpu(dump->dump_size);
+@@ -1621,6 +1624,7 @@ static int qca_power_on(struct hci_dev *hdev)
+       struct hci_uart *hu = hci_get_drvdata(hdev);
+       enum qca_btsoc_type soc_type = qca_soc_type(hu);
+       struct qca_serdev *qcadev;
++      struct qca_data *qca = hu->priv;
+       int ret = 0;
+       /* Non-serdev device usually is powered by external power
+@@ -1640,6 +1644,7 @@ static int qca_power_on(struct hci_dev *hdev)
+               }
+       }
++      clear_bit(QCA_BT_OFF, &qca->flags);
+       return ret;
+ }
+@@ -1659,7 +1664,7 @@ static int qca_setup(struct hci_uart *hu)
+               return ret;
+       /* Patch downloading has to be done without IBS mode */
+-      clear_bit(QCA_IBS_ENABLED, &qca->flags);
++      set_bit(QCA_IBS_DISABLED, &qca->flags);
+       /* Enable controller to do both LE scan and BR/EDR inquiry
+        * simultaneously.
+@@ -1710,7 +1715,7 @@ static int qca_setup(struct hci_uart *hu)
+       ret = qca_uart_setup(hdev, qca_baudrate, soc_type, soc_ver,
+                       firmware_name);
+       if (!ret) {
+-              set_bit(QCA_IBS_ENABLED, &qca->flags);
++              clear_bit(QCA_IBS_DISABLED, &qca->flags);
+               qca_debugfs_init(hdev);
+               hu->hdev->hw_error = qca_hw_error;
+               hu->hdev->cmd_timeout = qca_cmd_timeout;
+@@ -1814,7 +1819,7 @@ static void qca_power_shutdown(struct hci_uart *hu)
+        * data in skb's.
+        */
+       spin_lock_irqsave(&qca->hci_ibs_lock, flags);
+-      clear_bit(QCA_IBS_ENABLED, &qca->flags);
++      set_bit(QCA_IBS_DISABLED, &qca->flags);
+       qca_flush(hu);
+       spin_unlock_irqrestore(&qca->hci_ibs_lock, flags);
+@@ -1833,6 +1838,8 @@ static void qca_power_shutdown(struct hci_uart *hu)
+       } else if (qcadev->bt_en) {
+               gpiod_set_value_cansleep(qcadev->bt_en, 0);
+       }
++
++      set_bit(QCA_BT_OFF, &qca->flags);
+ }
+ static int qca_power_off(struct hci_dev *hdev)
+@@ -2093,11 +2100,34 @@ static int __maybe_unused qca_suspend(struct device *dev)
+       bool tx_pending = false;
+       int ret = 0;
+       u8 cmd;
++      u32 wait_timeout = 0;
+       set_bit(QCA_SUSPENDING, &qca->flags);
+-      /* Device is downloading patch or doesn't support in-band sleep. */
+-      if (!test_bit(QCA_IBS_ENABLED, &qca->flags))
++      if (test_bit(QCA_BT_OFF, &qca->flags))
++              return 0;
++
++      if (test_bit(QCA_IBS_DISABLED, &qca->flags)) {
++              wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ?
++                                      IBS_DISABLE_SSR_TIMEOUT_MS :
++                                      FW_DOWNLOAD_TIMEOUT_MS;
++
++              /* QCA_IBS_DISABLED flag is set to true, During FW download
++               * and during memory dump collection. It is reset to false,
++               * After FW download complete and after memory dump collections.
++               */
++              wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED,
++                          TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout));
++
++              if (test_bit(QCA_IBS_DISABLED, &qca->flags)) {
++                      bt_dev_err(hu->hdev, "SSR or FW download time out");
++                      ret = -ETIMEDOUT;
++                      goto error;
++              }
++      }
++
++      /* After memory dump collection, Controller is powered off.*/
++      if (test_bit(QCA_BT_OFF, &qca->flags))
+               return 0;
+       cancel_work_sync(&qca->ws_awake_device);
+-- 
+2.39.0
+
diff --git a/queue-5.10/drm-i915-gt-reset-twice.patch b/queue-5.10/drm-i915-gt-reset-twice.patch
new file mode 100644 (file)
index 0000000..ddda04d
--- /dev/null
@@ -0,0 +1,90 @@
+From 3d0246017a00d063299130a38f1b8207af40f6e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 12 Dec 2022 17:13:38 +0100
+Subject: drm/i915/gt: Reset twice
+
+From: Chris Wilson <chris@chris-wilson.co.uk>
+
+[ Upstream commit d3de5616d36462a646f5b360ba82d3b09ff668eb ]
+
+After applying an engine reset, on some platforms like Jasperlake, we
+occasionally detect that the engine state is not cleared until shortly
+after the resume. As we try to resume the engine with volatile internal
+state, the first request fails with a spurious CS event (it looks like
+it reports a lite-restore to the hung context, instead of the expected
+idle->active context switch).
+
+Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
+Cc: stable@vger.kernel.org
+Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
+Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
+Reviewed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com
+(cherry picked from commit 3db9d590557da3aa2c952f2fecd3e9b703dad790)
+Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++-----
+ 1 file changed, 28 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c
+index ac36b67fb46b..00b5912a88b8 100644
+--- a/drivers/gpu/drm/i915/gt/intel_reset.c
++++ b/drivers/gpu/drm/i915/gt/intel_reset.c
+@@ -289,6 +289,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask,
+ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
+ {
+       struct intel_uncore *uncore = gt->uncore;
++      int loops = 2;
+       int err;
+       /*
+@@ -296,18 +297,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask)
+        * for fifo space for the write or forcewake the chip for
+        * the read
+        */
+-      intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
++      do {
++              intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask);
+-      /* Wait for the device to ack the reset requests */
+-      err = __intel_wait_for_register_fw(uncore,
+-                                         GEN6_GDRST, hw_domain_mask, 0,
+-                                         500, 0,
+-                                         NULL);
++              /*
++               * Wait for the device to ack the reset requests.
++               *
++               * On some platforms, e.g. Jasperlake, we see that the
++               * engine register state is not cleared until shortly after
++               * GDRST reports completion, causing a failure as we try
++               * to immediately resume while the internal state is still
++               * in flux. If we immediately repeat the reset, the second
++               * reset appears to serialise with the first, and since
++               * it is a no-op, the registers should retain their reset
++               * value. However, there is still a concern that upon
++               * leaving the second reset, the internal engine state
++               * is still in flux and not ready for resuming.
++               */
++              err = __intel_wait_for_register_fw(uncore, GEN6_GDRST,
++                                                 hw_domain_mask, 0,
++                                                 2000, 0,
++                                                 NULL);
++      } while (err == 0 && --loops);
+       if (err)
+               drm_dbg(&gt->i915->drm,
+                       "Wait for 0x%08x engines reset failed\n",
+                       hw_domain_mask);
++      /*
++       * As we have observed that the engine state is still volatile
++       * after GDRST is acked, impose a small delay to let everything settle.
++       */
++      udelay(50);
++
+       return err;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch b/queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch
new file mode 100644 (file)
index 0000000..a1c7b0e
--- /dev/null
@@ -0,0 +1,46 @@
+From 7806a2d37dd3220a1aa94e587bd5b0ca2958f951 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 23:09:36 +0800
+Subject: efi: fix userspace infinite retry read efivars after EFI runtime
+ services page fault
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit e006ac3003080177cf0b673441a4241f77aaecce ]
+
+After [1][2], if we catch exceptions due to EFI runtime service, we will
+clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the
+subsequent routine which invoke the EFI runtime service should fail.
+
+But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck
+and infinite loop calling read() due to efivarfs_file_read() return -EINTR.
+
+The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is
+an improper return value in this situation, so let virt_efi_xxx() return
+EFI_DEVICE_ERROR and converted to -EIO to invoker.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services")
+Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/runtime-wrappers.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
+index f3e54f6616f0..60075e0e4943 100644
+--- a/drivers/firmware/efi/runtime-wrappers.c
++++ b/drivers/firmware/efi/runtime-wrappers.c
+@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work;
+                                                                       \
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {                       \
+               pr_warn_once("EFI Runtime Services are disabled!\n");   \
++              efi_rts_work.status = EFI_DEVICE_ERROR;                 \
+               goto exit;                                              \
+       }                                                               \
+                                                                       \
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch b/queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch
new file mode 100644 (file)
index 0000000..c130bcc
--- /dev/null
@@ -0,0 +1,60 @@
+From eb53b52ebcf3811844dfa6718842c85d9bbe1401 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Mar 2022 09:30:05 -0600
+Subject: io_uring: add flag for disabling provided buffer recycling
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 8a3e8ee56417f5e0e66580d93941ed9d6f4c8274 upstream.
+
+If we need to continue doing this IO, then we don't want a potentially
+selected buffer recycled. Add a flag for that.
+
+Set this for recv/recvmsg if they do partial IO.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 3d67b9b4100f..7f9fb0cb9230 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -740,6 +740,7 @@ enum {
+       REQ_F_CREDS_BIT,
+       REQ_F_REFCOUNT_BIT,
+       REQ_F_ARM_LTIMEOUT_BIT,
++      REQ_F_PARTIAL_IO_BIT,
+       /* keep async read/write and isreg together and in order */
+       REQ_F_NOWAIT_READ_BIT,
+       REQ_F_NOWAIT_WRITE_BIT,
+@@ -795,6 +796,8 @@ enum {
+       REQ_F_REFCOUNT          = BIT(REQ_F_REFCOUNT_BIT),
+       /* there is a linked timeout that has to be armed */
+       REQ_F_ARM_LTIMEOUT      = BIT(REQ_F_ARM_LTIMEOUT_BIT),
++      /* request has already done partial IO */
++      REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
+ };
+ struct async_poll {
+@@ -4963,6 +4966,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
+               req_set_fail(req);
+@@ -5036,6 +5040,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
+               req_set_fail(req);
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch b/queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch
new file mode 100644 (file)
index 0000000..4538b87
--- /dev/null
@@ -0,0 +1,53 @@
+From cb51b731674894f54045c3cd6ebab80e2a89af07 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 10:39:22 -0700
+Subject: io_uring: allow re-poll if we made progress
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 10c873334febaeea9aa0c25c10b5ac0951b77a5f upstream.
+
+We currently check REQ_F_POLLED before arming async poll for a
+notification to retry. If it's set, then we don't allow poll and will
+punt to io-wq instead. This is done to prevent a situation where a buggy
+driver will repeatedly return that there's space/data available yet we
+get -EAGAIN.
+
+However, if we already transferred data, then it should be safe to rely
+on poll again. Gate the check on whether or not REQ_F_PARTIAL_IO is
+also set.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 75d833269751..cc8e13de5fa9 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5694,7 +5694,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+       if (!req->file || !file_can_poll(req->file))
+               return IO_APOLL_ABORTED;
+-      if (req->flags & REQ_F_POLLED)
++      if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
+               return IO_APOLL_ABORTED;
+       if (!def->pollin && !def->pollout)
+               return IO_APOLL_ABORTED;
+@@ -5710,7 +5710,10 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+               mask |= POLLOUT | POLLWRNORM;
+       }
+-      apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++      if (req->flags & REQ_F_POLLED)
++              apoll = req->apoll;
++      else
++              apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+       if (unlikely(!apoll))
+               return IO_APOLL_ABORTED;
+       apoll->double_poll = NULL;
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch b/queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch
new file mode 100644 (file)
index 0000000..b08e9fa
--- /dev/null
@@ -0,0 +1,45 @@
+From fa17a75eca8d286d3bedd687eb7e5e896d0ac374 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 06:37:08 -0700
+Subject: io_uring: check for valid register opcode earlier
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ]
+
+We only check the register opcode value inside the restricted ring
+section, move it into the main io_uring_register() function instead
+and check it up front.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 8c8ba8c067ca..f05f033d8496 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -10805,8 +10805,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode,
+               return -ENXIO;
+       if (ctx->restricted) {
+-              if (opcode >= IORING_REGISTER_LAST)
+-                      return -EINVAL;
+               opcode = array_index_nospec(opcode, IORING_REGISTER_LAST);
+               if (!test_bit(opcode, ctx->restrictions.register_op))
+                       return -EACCES;
+@@ -10938,6 +10936,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
+       long ret = -EBADF;
+       struct fd f;
++      if (opcode >= IORING_REGISTER_LAST)
++              return -EINVAL;
++
+       f = fdget(fd);
+       if (!f.file)
+               return -EBADF;
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch b/queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch
new file mode 100644 (file)
index 0000000..cb7bf0d
--- /dev/null
@@ -0,0 +1,100 @@
+From 1bede82287bd73eb9e91510500259fa6cb6a4197 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:03 -0800
+Subject: io_uring: do not recalculate ppos unnecessarily
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit b4aec40015953b65f2f114641e7fd7714c8df8e6 upstream.
+
+There is a slight optimisation to be had by calculating the correct pos
+pointer inside io_kiocb_update_pos and then using that later.
+
+It seems code size drops by a bit:
+000000000000a1b0 0000000000000400 t io_read
+000000000000a5b0 0000000000000319 t io_write
+
+vs
+000000000000a1b0 00000000000003f6 t io_read
+000000000000a5b0 0000000000000310 t io_write
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index eaf8463c9b14..d4e017b07371 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3000,18 +3000,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+       }
+ }
+-static inline void io_kiocb_update_pos(struct io_kiocb *req)
++static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
+ {
+       struct kiocb *kiocb = &req->rw.kiocb;
++      bool is_stream = req->file->f_mode & FMODE_STREAM;
+       if (kiocb->ki_pos == -1) {
+-              if (!(req->file->f_mode & FMODE_STREAM)) {
++              if (!is_stream) {
+                       req->flags |= REQ_F_CUR_POS;
+                       kiocb->ki_pos = req->file->f_pos;
++                      return &kiocb->ki_pos;
+               } else {
+                       kiocb->ki_pos = 0;
++                      return NULL;
+               }
+       }
++      return is_stream ? NULL : &kiocb->ki_pos;
+ }
+ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+@@ -3537,6 +3541,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
++      loff_t *ppos;
+       if (rw) {
+               iter = &rw->iter;
+@@ -3569,9 +3574,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+               return ret ?: -EAGAIN;
+       }
+-      io_kiocb_update_pos(req);
++      ppos = io_kiocb_update_pos(req);
+-      ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
++      ret = rw_verify_area(READ, req->file, ppos, req->result);
+       if (unlikely(ret)) {
+               kfree(iovec);
+               return ret;
+@@ -3675,6 +3680,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
++      loff_t *ppos;
+       if (rw) {
+               iter = &rw->iter;
+@@ -3705,9 +3711,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+           (req->flags & REQ_F_ISREG))
+               goto copy_iov;
+-      io_kiocb_update_pos(req);
++      ppos = io_kiocb_update_pos(req);
+-      ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
++      ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+       if (unlikely(ret))
+               goto out_free;
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch b/queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch
new file mode 100644 (file)
index 0000000..096d0d9
--- /dev/null
@@ -0,0 +1,47 @@
+From 6e48a11832c85d20bab6944befbb7ebc0d1bac70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 20:50:24 -0700
+Subject: io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 46a525e199e4037516f7e498c18f065b09df32ac upstream.
+
+This isn't a reliable mechanism to tell if we have task_work pending, we
+really should be looking at whether we have any items queued. This is
+problematic if forward progress is gated on running said task_work. One
+such example is reading from a pipe, where the write side has been closed
+right before the read is started. The fput() of the file queues TWA_RESUME
+task_work, and we need that task_work to be run before ->release() is
+called for the pipe. If ->release() isn't called, then the read will sit
+forever waiting on data that will never arise.
+
+Fix this by io_run_task_work() so it checks if we have task_work pending
+rather than rely on TIF_NOTIFY_SIGNAL for that. The latter obviously
+doesn't work for task_work that is queued without TWA_SIGNAL.
+
+Reported-by: Christiano Haesbaert <haesbaert@haesbaert.org>
+Cc: stable@vger.kernel.org
+Link: https://github.com/axboe/liburing/issues/665
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io-wq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
+index 87bc38b47103..81485c1a9879 100644
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -513,7 +513,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+ static bool io_flush_signals(void)
+ {
+-      if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
++      if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
+               __set_current_state(TASK_RUNNING);
+               tracehook_notify_signal();
+               return true;
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch b/queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch
new file mode 100644 (file)
index 0000000..4ed7b3e
--- /dev/null
@@ -0,0 +1,107 @@
+From 8246ae8ea8105eda4453b0b2fea90815b74d5152 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 10:21:22 -0700
+Subject: io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 7ba89d2af17aa879dda30f5d5d3f152e587fc551 upstream.
+
+We currently don't attempt to get the full asked for length even if
+MSG_WAITALL is set, if we get a partial receive. If we do see a partial
+receive, then just note how many bytes we did and return -EAGAIN to
+get it retried.
+
+The iov is advanced appropriately for the vector based case, and we
+manually bump the buffer and remainder for the non-vector case.
+
+Cc: stable@vger.kernel.org
+Reported-by: Constantine Gavrilov <constantine.gavrilov@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 34dd6267679a..3d67b9b4100f 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -578,6 +578,7 @@ struct io_sr_msg {
+       int                             msg_flags;
+       int                             bgid;
+       size_t                          len;
++      size_t                          done_io;
+       struct io_buffer                *kbuf;
+ };
+@@ -4903,12 +4904,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+ #endif
++      sr->done_io = 0;
+       return 0;
+ }
++static bool io_net_retry(struct socket *sock, int flags)
++{
++      if (!(flags & MSG_WAITALL))
++              return false;
++      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
++}
++
+ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
++      struct io_sr_msg *sr = &req->sr_msg;
+       struct socket *sock;
+       struct io_buffer *kbuf;
+       unsigned flags;
+@@ -4951,6 +4961,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->done_io += ret;
++                      return io_setup_async_msg(req, kmsg);
++              }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+               req_set_fail(req);
+@@ -4962,6 +4976,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+@@ -5014,12 +5032,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->len -= ret;
++                      sr->buf += ret;
++                      sr->done_io += ret;
++                      return -EAGAIN;
++              }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+               req_set_fail(req);
+       }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch b/queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch
new file mode 100644 (file)
index 0000000..14da9a7
--- /dev/null
@@ -0,0 +1,55 @@
+From a0e88106b70717e45b0957db31d9a30f027f8638 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 12:36:08 -0700
+Subject: io_uring: ensure that cached task references are always put on exit
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e775f93f2ab976a2cdb4a7b53063cbe890904f73 upstream.
+
+io_uring caches task references to avoid doing atomics for each of them
+per request. If a request is put from the same task that allocated it,
+then we can maintain a per-ctx cache of them. This obviously relies
+on io_uring always pruning caches in a reliable way, and there's
+currently a case off io_uring fd release where we can miss that.
+
+One example is a ring setup with IOPOLL, which relies on the task
+polling for completions, which will free them. However, if such a task
+submits a request and then exits or closes the ring without reaping
+the completion, then ring release will reap and put. If release happens
+from that very same task, the completed request task refs will get
+put back into the cache pool. This is problematic, as we're now beyond
+the point of pruning caches.
+
+Manually drop these caches after doing an IOPOLL reap. This releases
+references from the current task, which is enough. If another task
+happens to be doing the release, then the caching will not be
+triggered and there's no issue.
+
+Cc: stable@vger.kernel.org
+Fixes: e98e49b2bbf7 ("io_uring: extend task put optimisations")
+Reported-by: Homin Rhee <hominlab@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index e8852d56b1ec..f8a0d228d799 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -9513,6 +9513,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+       /* if we failed setting up the ctx, we might not have any rings */
+       io_iopoll_try_reap_events(ctx);
++      /* drop cached put refs after potentially doing completions */
++      if (current->io_uring)
++              io_uring_drop_tctx_refs(current);
++
+       INIT_WORK(&ctx->exit_work, io_ring_exit_work);
+       /*
+        * Use system_unbound_wq to avoid spawning tons of event kworkers
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch b/queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch
new file mode 100644 (file)
index 0000000..0f4ae7e
--- /dev/null
@@ -0,0 +1,50 @@
+From 965d1b22a5e0f21db9f1dd08bf9f07d2799e6f81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 09:13:12 -0700
+Subject: io_uring: fix async accept on O_NONBLOCK sockets
+
+From: Dylan Yudaken <dylany@meta.com>
+
+commit a73825ba70c93e1eb39a845bb3d9885a787f8ffe upstream.
+
+Do not set REQ_F_NOWAIT if the socket is non blocking. When enabled this
+causes the accept to immediately post a CQE with EAGAIN, which means you
+cannot perform an accept SQE on a NONBLOCK socket asynchronously.
+
+By removing the flag if there is no pending accept then poll is armed as
+usual and when a connection comes in the CQE is posted.
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Link: https://lore.kernel.org/r/20220324143435.2875844-1-dylany@fb.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index cc8e13de5fa9..8c8ba8c067ca 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5112,9 +5112,6 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+       struct file *file;
+       int ret, fd;
+-      if (req->file->f_flags & O_NONBLOCK)
+-              req->flags |= REQ_F_NOWAIT;
+-
+       if (!fixed) {
+               fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+               if (unlikely(fd < 0))
+@@ -5127,6 +5124,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
++              /* safe to retry */
++              req->flags |= REQ_F_PARTIAL_IO;
+               if (ret == -EAGAIN && force_nonblock)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch b/queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch
new file mode 100644 (file)
index 0000000..007c9f6
--- /dev/null
@@ -0,0 +1,60 @@
+From e149257b0a2a60b966c3855202018f7acd711044 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 10:49:15 +0000
+Subject: io_uring: fix CQ waiting timeout handling
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 12521a5d5cb7ff0ad43eadfc9c135d86e1131fa8 upstream.
+
+Jiffy to ktime CQ waiting conversion broke how we treat timeouts, in
+particular we rearm it anew every time we get into
+io_cqring_wait_schedule() without adjusting the timeout. Waiting for 2
+CQEs and getting a task_work in the middle may double the timeout value,
+or even worse in some cases task may wait indefinitely.
+
+Cc: stable@vger.kernel.org
+Fixes: 228339662b398 ("io_uring: don't convert to jiffies for waiting on timeouts")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/f7bffddd71b08f28a877d44d37ac953ddb01590d.1672915663.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index b7bd5138bdaf..e8852d56b1ec 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -7518,7 +7518,7 @@ static int io_run_task_work_sig(void)
+ /* when returns >0, the caller should retry */
+ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+                                         struct io_wait_queue *iowq,
+-                                        ktime_t timeout)
++                                        ktime_t *timeout)
+ {
+       int ret;
+@@ -7530,7 +7530,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
+       if (test_bit(0, &ctx->check_cq_overflow))
+               return 1;
+-      if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS))
++      if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS))
+               return -ETIME;
+       return 1;
+ }
+@@ -7593,7 +7593,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
+               }
+               prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq,
+                                               TASK_INTERRUPTIBLE);
+-              ret = io_cqring_wait_schedule(ctx, &iowq, timeout);
++              ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
+               finish_wait(&ctx->cq_wait, &iowq.wq);
+               cond_resched();
+       } while (ret > 0);
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-improve-send-recv-error-handling.patch b/queue-5.10/io_uring-improve-send-recv-error-handling.patch
new file mode 100644 (file)
index 0000000..f4b4ade
--- /dev/null
@@ -0,0 +1,126 @@
+From 762279fd2bd47c45c44bd2fa4fee2f84c6dbed86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Nov 2021 00:07:47 +0000
+Subject: io_uring: improve send/recv error handling
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 7297ce3d59449de49d3c9e1f64ae25488750a1fc upstream.
+
+Hide all error handling under common if block, removes two extra ifs on
+the success path and keeps the handling more condensed.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/5761545158a12968f3caf30f747eea65ed75dfc1.1637524285.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 55 +++++++++++++++++++++++++--------------------
+ 1 file changed, 31 insertions(+), 24 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 0c4d16afb9ef..34dd6267679a 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4706,17 +4706,18 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+       ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+-      if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+-              return io_setup_async_msg(req, kmsg);
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++                      return io_setup_async_msg(req, kmsg);
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      }
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+-      if (ret < min_ret)
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4752,13 +4753,13 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
+       msg.msg_flags = flags;
+       ret = sock_sendmsg(sock, &msg);
+-      if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+-              return -EAGAIN;
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
+-
+-      if (ret < min_ret)
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++                      return -EAGAIN;
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
+               req_set_fail(req);
++      }
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4945,10 +4946,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
+                                       kmsg->uaddr, flags);
+-      if (force_nonblock && ret == -EAGAIN)
+-              return io_setup_async_msg(req, kmsg);
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && force_nonblock)
++                      return io_setup_async_msg(req, kmsg);
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++              req_set_fail(req);
++      }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
+@@ -4956,8 +4962,6 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+-      if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+@@ -5004,15 +5008,18 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+               min_ret = iov_iter_count(&msg.msg_iter);
+       ret = sock_recvmsg(sock, &msg, flags);
+-      if (force_nonblock && ret == -EAGAIN)
+-              return -EAGAIN;
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
+ out_free:
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && force_nonblock)
++                      return -EAGAIN;
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++              req_set_fail(req);
++      }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
+-      if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-lock-overflowing-for-iopoll.patch b/queue-5.10/io_uring-lock-overflowing-for-iopoll.patch
new file mode 100644 (file)
index 0000000..33888d8
--- /dev/null
@@ -0,0 +1,79 @@
+From fcf3f646e11811e91abc477e5f256607c382b9ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 14 Jan 2023 09:14:03 -0700
+Subject: io_uring: lock overflowing for IOPOLL
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream.
+
+syzbot reports an issue with overflow filling for IOPOLL:
+
+WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
+CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0
+Workqueue: events_unbound io_ring_exit_work
+Call trace:
+ io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734
+ io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773
+ io_fill_cqe_req io_uring/io_uring.h:168 [inline]
+ io_do_iopoll+0x474/0x62c io_uring/rw.c:1065
+ io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513
+ io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056
+ io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869
+ process_one_work+0x2d8/0x504 kernel/workqueue.c:2289
+ worker_thread+0x340/0x610 kernel/workqueue.c:2436
+ kthread+0x12c/0x158 kernel/kthread.c:376
+ ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863
+
+There is no real problem for normal IOPOLL as flush is also called with
+uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL,
+for which __io_cqring_overflow_flush() happens from the CQ waiting path.
+
+Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org # 5.10+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 18 ++++++++++++++++--
+ 1 file changed, 16 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index f05f033d8496..b7bd5138bdaf 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2482,12 +2482,26 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
+       io_init_req_batch(&rb);
+       while (!list_empty(done)) {
++              struct io_uring_cqe *cqe;
++              unsigned cflags;
++
+               req = list_first_entry(done, struct io_kiocb, inflight_entry);
+               list_del(&req->inflight_entry);
+-
+-              io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req));
++              cflags = io_put_rw_kbuf(req);
+               (*nr_events)++;
++              cqe = io_get_cqe(ctx);
++              if (cqe) {
++                      WRITE_ONCE(cqe->user_data, req->user_data);
++                      WRITE_ONCE(cqe->res, req->result);
++                      WRITE_ONCE(cqe->flags, cflags);
++              } else {
++                      spin_lock(&ctx->completion_lock);
++                      io_cqring_event_overflow(ctx, req->user_data,
++                                                      req->result, cflags);
++                      spin_unlock(&ctx->completion_lock);
++              }
++
+               if (req_ref_put_and_test(req))
+                       io_req_free_batch(&rb, req, &ctx->submit_state);
+       }
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch b/queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch
new file mode 100644 (file)
index 0000000..fb4bd1b
--- /dev/null
@@ -0,0 +1,65 @@
+From 57543eec020ee514e90dfd4c9eaf6938e861052e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:01 -0800
+Subject: io_uring: remove duplicated calls to io_kiocb_ppos
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit af9c45ecebaf1b428306f41421f4bcffe439f735 upstream.
+
+io_kiocb_ppos is called in both branches, and it seems that the compiler
+does not fuse this. Fusing removes a few bytes from loop_rw_iter.
+
+Before:
+$ nm -S fs/io_uring.o | grep loop_rw_iter
+0000000000002430 0000000000000124 t loop_rw_iter
+
+After:
+$ nm -S fs/io_uring.o | grep loop_rw_iter
+0000000000002430 000000000000010d t loop_rw_iter
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index f8a0d228d799..d8926475cd88 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3300,6 +3300,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+       struct kiocb *kiocb = &req->rw.kiocb;
+       struct file *file = req->file;
+       ssize_t ret = 0;
++      loff_t *ppos;
+       /*
+        * Don't support polled IO through this interface, and we can't
+@@ -3311,6 +3312,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+       if (kiocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
++      ppos = io_kiocb_ppos(kiocb);
++
+       while (iov_iter_count(iter)) {
+               struct iovec iovec;
+               ssize_t nr;
+@@ -3324,10 +3327,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+               if (rw == READ) {
+                       nr = file->f_op->read(file, iovec.iov_base,
+-                                            iovec.iov_len, io_kiocb_ppos(kiocb));
++                                            iovec.iov_len, ppos);
+               } else {
+                       nr = file->f_op->write(file, iovec.iov_base,
+-                                             iovec.iov_len, io_kiocb_ppos(kiocb));
++                                             iovec.iov_len, ppos);
+               }
+               if (nr < 0) {
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch b/queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch
new file mode 100644 (file)
index 0000000..835139f
--- /dev/null
@@ -0,0 +1,122 @@
+From 1b2cbd41576d68fdd10224114346104806482df4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 13:38:51 -0700
+Subject: io_uring/rw: defer fsnotify calls to task context
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit b000145e9907809406d8164c3b2b8861d95aecd1 upstream.
+
+We can't call these off the kiocb completion as that might be off
+soft/hard irq context. Defer the calls to when we process the
+task_work for this request. That avoids valid complaints like:
+
+stack backtrace:
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.0.0-rc6-syzkaller-00321-g105a36f3694e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_usage_bug kernel/locking/lockdep.c:3961 [inline]
+ valid_state kernel/locking/lockdep.c:3973 [inline]
+ mark_lock_irq kernel/locking/lockdep.c:4176 [inline]
+ mark_lock.part.0.cold+0x18/0xd8 kernel/locking/lockdep.c:4632
+ mark_lock kernel/locking/lockdep.c:4596 [inline]
+ mark_usage kernel/locking/lockdep.c:4527 [inline]
+ __lock_acquire+0x11d9/0x56d0 kernel/locking/lockdep.c:5007
+ lock_acquire kernel/locking/lockdep.c:5666 [inline]
+ lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631
+ __fs_reclaim_acquire mm/page_alloc.c:4674 [inline]
+ fs_reclaim_acquire+0x115/0x160 mm/page_alloc.c:4688
+ might_alloc include/linux/sched/mm.h:271 [inline]
+ slab_pre_alloc_hook mm/slab.h:700 [inline]
+ slab_alloc mm/slab.c:3278 [inline]
+ __kmem_cache_alloc_lru mm/slab.c:3471 [inline]
+ kmem_cache_alloc+0x39/0x520 mm/slab.c:3491
+ fanotify_alloc_fid_event fs/notify/fanotify/fanotify.c:580 [inline]
+ fanotify_alloc_event fs/notify/fanotify/fanotify.c:813 [inline]
+ fanotify_handle_event+0x1130/0x3f40 fs/notify/fanotify/fanotify.c:948
+ send_to_group fs/notify/fsnotify.c:360 [inline]
+ fsnotify+0xafb/0x1680 fs/notify/fsnotify.c:570
+ __fsnotify_parent+0x62f/0xa60 fs/notify/fsnotify.c:230
+ fsnotify_parent include/linux/fsnotify.h:77 [inline]
+ fsnotify_file include/linux/fsnotify.h:99 [inline]
+ fsnotify_access include/linux/fsnotify.h:309 [inline]
+ __io_complete_rw_common+0x485/0x720 io_uring/rw.c:195
+ io_complete_rw+0x1a/0x1f0 io_uring/rw.c:228
+ iomap_dio_complete_work fs/iomap/direct-io.c:144 [inline]
+ iomap_dio_bio_end_io+0x438/0x5e0 fs/iomap/direct-io.c:178
+ bio_endio+0x5f9/0x780 block/bio.c:1564
+ req_bio_endio block/blk-mq.c:695 [inline]
+ blk_update_request+0x3fc/0x1300 block/blk-mq.c:825
+ scsi_end_request+0x7a/0x9a0 drivers/scsi/scsi_lib.c:541
+ scsi_io_completion+0x173/0x1f70 drivers/scsi/scsi_lib.c:971
+ scsi_complete+0x122/0x3b0 drivers/scsi/scsi_lib.c:1438
+ blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1022
+ __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571
+ invoke_softirq kernel/softirq.c:445 [inline]
+ __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650
+ irq_exit_rcu+0x5/0x20 kernel/softirq.c:662
+ common_interrupt+0xa9/0xc0 arch/x86/kernel/irq.c:240
+
+Fixes: f63cf5192fe3 ("io_uring: ensure that fsnotify is always called")
+Link: https://lore.kernel.org/all/20220929135627.ykivmdks2w5vzrwg@quack3/
+Reported-by: syzbot+dfcc5f4da15868df7d4d@syzkaller.appspotmail.com
+Reported-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index d4e017b07371..33e6e1011105 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2702,12 +2702,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
+ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
+ {
+-      if (req->rw.kiocb.ki_flags & IOCB_WRITE) {
+-              kiocb_end_write(req);
+-              fsnotify_modify(req->file);
+-      } else {
+-              fsnotify_access(req->file);
+-      }
+       if (res != req->result) {
+               if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
+                   io_rw_should_reissue(req)) {
+@@ -2760,6 +2754,20 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+       __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req));
+ }
++static void io_req_rw_complete(struct io_kiocb *req, bool *locked)
++{
++      struct io_rw *rw = &req->rw;
++
++      if (rw->kiocb.ki_flags & IOCB_WRITE) {
++              kiocb_end_write(req);
++              fsnotify_modify(req->file);
++      } else {
++              fsnotify_access(req->file);
++      }
++
++      io_req_task_complete(req, locked);
++}
++
+ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+ {
+       struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+@@ -2767,7 +2775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+       if (__io_complete_rw_common(req, res))
+               return;
+       req->result = io_fixup_rw_res(req, res);
+-      req->io_task_work.func = io_req_task_complete;
++      req->io_task_work.func = io_req_rw_complete;
+       io_req_task_work_add(req);
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch b/queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch
new file mode 100644 (file)
index 0000000..07f6c58
--- /dev/null
@@ -0,0 +1,111 @@
+From 64234e92ae08fd4259a60c12858cb1597bb76a33 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Apr 2022 19:21:36 -0600
+Subject: io_uring: support MSG_WAITALL for IORING_OP_SEND(MSG)
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 4c3c09439c08b03d9503df0ca4c7619c5842892e upstream.
+
+Like commit 7ba89d2af17a for recv/recvmsg, support MSG_WAITALL for the
+send side. If this flag is set and we do a short send, retry for a
+stream of seqpacket socket.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 36 +++++++++++++++++++++++++++++-------
+ 1 file changed, 29 insertions(+), 7 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 7f9fb0cb9230..75d833269751 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4617,6 +4617,13 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
+ }
+ #if defined(CONFIG_NET)
++static bool io_net_retry(struct socket *sock, int flags)
++{
++      if (!(flags & MSG_WAITALL))
++              return false;
++      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
++}
++
+ static int io_setup_async_msg(struct io_kiocb *req,
+                             struct io_async_msghdr *kmsg)
+ {
+@@ -4680,12 +4687,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+ #endif
++      sr->done_io = 0;
+       return 0;
+ }
+ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
++      struct io_sr_msg *sr = &req->sr_msg;
+       struct socket *sock;
+       unsigned flags;
+       int min_ret = 0;
+@@ -4716,12 +4725,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
++                      return io_setup_async_msg(req, kmsg);
++              }
+               req_set_fail(req);
+       }
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4762,8 +4780,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->len -= ret;
++                      sr->buf += ret;
++                      sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
++                      return -EAGAIN;
++              }
+               req_set_fail(req);
+       }
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4911,13 +4940,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       return 0;
+ }
+-static bool io_net_retry(struct socket *sock, int flags)
+-{
+-      if (!(flags & MSG_WAITALL))
+-              return false;
+-      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+-}
+-
+ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
+-- 
+2.39.0
+
diff --git a/queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch b/queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch
new file mode 100644 (file)
index 0000000..1b701b2
--- /dev/null
@@ -0,0 +1,86 @@
+From 95109edd090ae15269d711c14faf965d7b4d8b83 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:02 -0800
+Subject: io_uring: update kiocb->ki_pos at execution time
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e upstream.
+
+Update kiocb->ki_pos at execution time rather than in io_prep_rw().
+io_prep_rw() happens before the job is enqueued to a worker and so the
+offset might be read multiple times before being executed once.
+
+Ensures that the file position in a set of _linked_ SQEs will be only
+obtained after earlier SQEs have completed, and so will include their
+incremented file position.
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 26 ++++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index d8926475cd88..eaf8463c9b14 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2919,14 +2919,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+               req->flags |= REQ_F_ISREG;
+       kiocb->ki_pos = READ_ONCE(sqe->off);
+-      if (kiocb->ki_pos == -1) {
+-              if (!(file->f_mode & FMODE_STREAM)) {
+-                      req->flags |= REQ_F_CUR_POS;
+-                      kiocb->ki_pos = file->f_pos;
+-              } else {
+-                      kiocb->ki_pos = 0;
+-              }
+-      }
+       kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+       kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+       ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+@@ -3008,6 +3000,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+       }
+ }
++static inline void io_kiocb_update_pos(struct io_kiocb *req)
++{
++      struct kiocb *kiocb = &req->rw.kiocb;
++
++      if (kiocb->ki_pos == -1) {
++              if (!(req->file->f_mode & FMODE_STREAM)) {
++                      req->flags |= REQ_F_CUR_POS;
++                      kiocb->ki_pos = req->file->f_pos;
++              } else {
++                      kiocb->ki_pos = 0;
++              }
++      }
++}
++
+ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+                      unsigned int issue_flags)
+ {
+@@ -3563,6 +3569,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+               return ret ?: -EAGAIN;
+       }
++      io_kiocb_update_pos(req);
++
+       ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+       if (unlikely(ret)) {
+               kfree(iovec);
+@@ -3697,6 +3705,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+           (req->flags & REQ_F_ISREG))
+               goto copy_iov;
++      io_kiocb_update_pos(req);
++
+       ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+       if (unlikely(ret))
+               goto out_free;
+-- 
+2.39.0
+
index fe23331850ec1a244905155506981afc26a1252d..fe767a8e5cb08713c0c8132e1c11cfa84cf96959 100644 (file)
@@ -12,3 +12,23 @@ wifi-mac80211-sdata-can-be-null-during-ampdu-start.patch
 add-exception-protection-processing-for-vd-in-axi_chan_handle_err-function.patch
 zonefs-detect-append-writes-at-invalid-locations.patch
 nilfs2-fix-general-protection-fault-in-nilfs_btree_insert.patch
+efi-fix-userspace-infinite-retry-read-efivars-after-.patch
+alsa-hda-realtek-turn-on-power-early.patch
+drm-i915-gt-reset-twice.patch
+bluetooth-hci_qca-wait-for-timeout-during-suspend.patch
+bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch
+io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch
+io_uring-improve-send-recv-error-handling.patch
+io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch
+io_uring-add-flag-for-disabling-provided-buffer-recy.patch
+io_uring-support-msg_waitall-for-ioring_op_send-msg.patch
+io_uring-allow-re-poll-if-we-made-progress.patch
+io_uring-fix-async-accept-on-o_nonblock-sockets.patch
+io_uring-check-for-valid-register-opcode-earlier.patch
+io_uring-lock-overflowing-for-iopoll.patch
+io_uring-fix-cq-waiting-timeout-handling.patch
+io_uring-ensure-that-cached-task-references-are-alwa.patch
+io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch
+io_uring-update-kiocb-ki_pos-at-execution-time.patch
+io_uring-do-not-recalculate-ppos-unnecessarily.patch
+io_uring-rw-defer-fsnotify-calls-to-task-context.patch