From a0ab1f28eb4e11713936796db18048a7a7014cd3 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 21 Jan 2023 23:27:50 -0500 Subject: [PATCH] Fixes for 5.10 Signed-off-by: Sasha Levin --- ...alsa-hda-realtek-turn-on-power-early.patch | 86 +++++++++ ...a-fix-driver-shutdown-on-closed-serd.patch | 95 ++++++++++ ..._qca-wait-for-timeout-during-suspend.patch | 164 ++++++++++++++++++ queue-5.10/drm-i915-gt-reset-twice.patch | 90 ++++++++++ ...e-infinite-retry-read-efivars-after-.patch | 46 +++++ ...g-for-disabling-provided-buffer-recy.patch | 60 +++++++ ...ng-allow-re-poll-if-we-made-progress.patch | 53 ++++++ ...ck-for-valid-register-opcode-earlier.patch | 45 +++++ ...o-not-recalculate-ppos-unnecessarily.patch | 100 +++++++++++ ...ate-task_work-run-on-tif_notify_sign.patch | 47 +++++ ...recv-and-recvmsg-handle-msg_waitall-.patch | 107 ++++++++++++ ...that-cached-task-references-are-alwa.patch | 55 ++++++ ...x-async-accept-on-o_nonblock-sockets.patch | 50 ++++++ ...ring-fix-cq-waiting-timeout-handling.patch | 60 +++++++ ...ing-improve-send-recv-error-handling.patch | 126 ++++++++++++++ ...io_uring-lock-overflowing-for-iopoll.patch | 79 +++++++++ ...ve-duplicated-calls-to-io_kiocb_ppos.patch | 65 +++++++ ...defer-fsnotify-calls-to-task-context.patch | 122 +++++++++++++ ...t-msg_waitall-for-ioring_op_send-msg.patch | 111 ++++++++++++ ...pdate-kiocb-ki_pos-at-execution-time.patch | 86 +++++++++ queue-5.10/series | 20 +++ 21 files changed, 1667 insertions(+) create mode 100644 queue-5.10/alsa-hda-realtek-turn-on-power-early.patch create mode 100644 queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch create mode 100644 queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch create mode 100644 queue-5.10/drm-i915-gt-reset-twice.patch create mode 100644 queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch create mode 100644 queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch create mode 100644 queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch create mode 100644 queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch create mode 100644 queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch create mode 100644 queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch create mode 100644 queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch create mode 100644 queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch create mode 100644 queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch create mode 100644 queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch create mode 100644 queue-5.10/io_uring-improve-send-recv-error-handling.patch create mode 100644 queue-5.10/io_uring-lock-overflowing-for-iopoll.patch create mode 100644 queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch create mode 100644 queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch create mode 100644 queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch create mode 100644 queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch diff --git a/queue-5.10/alsa-hda-realtek-turn-on-power-early.patch b/queue-5.10/alsa-hda-realtek-turn-on-power-early.patch new file mode 100644 index 00000000000..e4d7caed07f --- /dev/null +++ b/queue-5.10/alsa-hda-realtek-turn-on-power-early.patch @@ -0,0 +1,86 @@ +From 6332fca1aff9874462003d3878c7a7e069280a0d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 30 Dec 2022 15:22:25 +0800 +Subject: ALSA: hda/realtek - Turn on power early + +From: Yuchi Yang + +[ Upstream commit 1f680609bf1beac20e2a31ddcb1b88874123c39f ] + +Turn on power early to avoid wrong state for power relation register. +This can earlier update JD state when resume back. + +Signed-off-by: Yuchi Yang +Cc: +Link: https://lore.kernel.org/r/e35d8f4fa18f4448a2315cc7d4a3715f@realtek.com +Signed-off-by: Takashi Iwai +Signed-off-by: Sasha Levin +--- + sound/pci/hda/patch_realtek.c | 30 ++++++++++++++++-------------- + 1 file changed, 16 insertions(+), 14 deletions(-) + +diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c +index eb7dd457ef5a..cfd86389d37f 100644 +--- a/sound/pci/hda/patch_realtek.c ++++ b/sound/pci/hda/patch_realtek.c +@@ -3561,6 +3561,15 @@ static void alc256_init(struct hda_codec *codec) + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp_pin_sense; + ++ if (spec->ultra_low_power) { ++ alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); ++ alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); ++ alc_update_coef_idx(codec, 0x08, 7<<4, 0); ++ alc_update_coef_idx(codec, 0x3b, 1<<15, 0); ++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); ++ msleep(30); ++ } ++ + if (!hp_pin) + hp_pin = 0x21; + +@@ -3572,14 +3581,6 @@ static void alc256_init(struct hda_codec *codec) + msleep(2); + + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ +- if (spec->ultra_low_power) { +- alc_update_coef_idx(codec, 0x03, 1<<1, 1<<1); +- alc_update_coef_idx(codec, 0x08, 3<<2, 3<<2); +- alc_update_coef_idx(codec, 0x08, 7<<4, 0); +- alc_update_coef_idx(codec, 0x3b, 1<<15, 0); +- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); +- msleep(30); +- } + + snd_hda_codec_write(codec, hp_pin, 0, + AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_MUTE); +@@ -3661,6 +3662,13 @@ static void alc225_init(struct hda_codec *codec) + hda_nid_t hp_pin = alc_get_hp_pin(spec); + bool hp1_pin_sense, hp2_pin_sense; + ++ if (spec->ultra_low_power) { ++ alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); ++ alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); ++ alc_update_coef_idx(codec, 0x33, 1<<11, 0); ++ msleep(30); ++ } ++ + if (!hp_pin) + hp_pin = 0x21; + msleep(30); +@@ -3672,12 +3680,6 @@ static void alc225_init(struct hda_codec *codec) + msleep(2); + + alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x1); /* Low power */ +- if (spec->ultra_low_power) { +- alc_update_coef_idx(codec, 0x08, 0x0f << 2, 3<<2); +- alc_update_coef_idx(codec, 0x0e, 7<<6, 7<<6); +- alc_update_coef_idx(codec, 0x33, 1<<11, 0); +- msleep(30); +- } + + if (hp1_pin_sense || spec->ultra_low_power) + snd_hda_codec_write(codec, hp_pin, 0, +-- +2.39.0 + diff --git a/queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch b/queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch new file mode 100644 index 00000000000..88c68de37a1 --- /dev/null +++ b/queue-5.10/bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch @@ -0,0 +1,95 @@ +From a52afdced306001f6015a7468158effdca550a1b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 29 Dec 2022 11:28:29 +0100 +Subject: Bluetooth: hci_qca: Fix driver shutdown on closed serdev + +From: Krzysztof Kozlowski + +[ Upstream commit 272970be3dabd24cbe50e393ffee8f04aec3b9a8 ] + +The driver shutdown callback (which sends EDL_SOC_RESET to the device +over serdev) should not be invoked when HCI device is not open (e.g. if +hci_dev_open_sync() failed), because the serdev and its TTY are not open +either. Also skip this step if device is powered off +(qca_power_shutdown()). + +The shutdown callback causes use-after-free during system reboot with +Qualcomm Atheros Bluetooth: + + Unable to handle kernel paging request at virtual address + 0072662f67726fd7 + ... + CPU: 6 PID: 1 Comm: systemd-shutdow Tainted: G W + 6.1.0-rt5-00325-g8a5f56bcfcca #8 + Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) + Call trace: + tty_driver_flush_buffer+0x4/0x30 + serdev_device_write_flush+0x24/0x34 + qca_serdev_shutdown+0x80/0x130 [hci_uart] + device_shutdown+0x15c/0x260 + kernel_restart+0x48/0xac + +KASAN report: + + BUG: KASAN: use-after-free in tty_driver_flush_buffer+0x1c/0x50 + Read of size 8 at addr ffff16270c2e0018 by task systemd-shutdow/1 + + CPU: 7 PID: 1 Comm: systemd-shutdow Not tainted + 6.1.0-next-20221220-00014-gb85aaf97fb01-dirty #28 + Hardware name: Qualcomm Technologies, Inc. Robotics RB5 (DT) + Call trace: + dump_backtrace.part.0+0xdc/0xf0 + show_stack+0x18/0x30 + dump_stack_lvl+0x68/0x84 + print_report+0x188/0x488 + kasan_report+0xa4/0xf0 + __asan_load8+0x80/0xac + tty_driver_flush_buffer+0x1c/0x50 + ttyport_write_flush+0x34/0x44 + serdev_device_write_flush+0x48/0x60 + qca_serdev_shutdown+0x124/0x274 + device_shutdown+0x1e8/0x350 + kernel_restart+0x48/0xb0 + __do_sys_reboot+0x244/0x2d0 + __arm64_sys_reboot+0x54/0x70 + invoke_syscall+0x60/0x190 + el0_svc_common.constprop.0+0x7c/0x160 + do_el0_svc+0x44/0xf0 + el0_svc+0x2c/0x6c + el0t_64_sync_handler+0xbc/0x140 + el0t_64_sync+0x190/0x194 + +Fixes: 7e7bbddd029b ("Bluetooth: hci_qca: Fix qca6390 enable failure after warm reboot") +Cc: +Signed-off-by: Krzysztof Kozlowski +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/hci_qca.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c +index 652290425028..096a604ef47f 100644 +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -2064,10 +2064,17 @@ static void qca_serdev_shutdown(struct device *dev) + int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS); + struct serdev_device *serdev = to_serdev_device(dev); + struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev); ++ struct hci_uart *hu = &qcadev->serdev_hu; ++ struct hci_dev *hdev = hu->hdev; ++ struct qca_data *qca = hu->priv; + const u8 ibs_wake_cmd[] = { 0xFD }; + const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 }; + + if (qcadev->btsoc_type == QCA_QCA6390) { ++ if (test_bit(QCA_BT_OFF, &qca->flags) || ++ !test_bit(HCI_RUNNING, &hdev->flags)) ++ return; ++ + serdev_device_write_flush(serdev); + ret = serdev_device_write_buf(serdev, ibs_wake_cmd, + sizeof(ibs_wake_cmd)); +-- +2.39.0 + diff --git a/queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch b/queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch new file mode 100644 index 00000000000..e71f80171a5 --- /dev/null +++ b/queue-5.10/bluetooth-hci_qca-wait-for-timeout-during-suspend.patch @@ -0,0 +1,164 @@ +From 7b3e495e12e1eccee6464b6ab99c1aee4dc222b1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 6 Oct 2020 20:50:21 +0530 +Subject: Bluetooth: hci_qca: Wait for timeout during suspend + +From: Venkata Lakshmi Narayana Gubba + +[ Upstream commit 2be43abac5a839d44bf9d14716573ae0ac920f2b ] + +Currently qca_suspend() is relied on IBS mechanism. During +FW download and memory dump collections, IBS will be disabled. +In those cases, driver will allow suspend and still uses the +serdev port, which results to errors. Now added a wait timeout +if suspend is triggered during FW download and memory collections. + +Signed-off-by: Venkata Lakshmi Narayana Gubba +Signed-off-by: Balakrishna Godavarthi +Reviewed-by: Abhishek Pandit-Subedi +Signed-off-by: Marcel Holtmann +Stable-dep-of: 272970be3dab ("Bluetooth: hci_qca: Fix driver shutdown on closed serdev") +Signed-off-by: Sasha Levin +--- + drivers/bluetooth/hci_qca.c | 48 ++++++++++++++++++++++++++++++------- + 1 file changed, 39 insertions(+), 9 deletions(-) + +diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c +index 60b0e13bb9fc..652290425028 100644 +--- a/drivers/bluetooth/hci_qca.c ++++ b/drivers/bluetooth/hci_qca.c +@@ -50,6 +50,8 @@ + #define IBS_HOST_TX_IDLE_TIMEOUT_MS 2000 + #define CMD_TRANS_TIMEOUT_MS 100 + #define MEMDUMP_TIMEOUT_MS 8000 ++#define IBS_DISABLE_SSR_TIMEOUT_MS (MEMDUMP_TIMEOUT_MS + 1000) ++#define FW_DOWNLOAD_TIMEOUT_MS 3000 + + /* susclk rate */ + #define SUSCLK_RATE_32KHZ 32768 +@@ -68,12 +70,13 @@ + #define QCA_MEMDUMP_BYTE 0xFB + + enum qca_flags { +- QCA_IBS_ENABLED, ++ QCA_IBS_DISABLED, + QCA_DROP_VENDOR_EVENT, + QCA_SUSPENDING, + QCA_MEMDUMP_COLLECTION, + QCA_HW_ERROR_EVENT, +- QCA_SSR_TRIGGERED ++ QCA_SSR_TRIGGERED, ++ QCA_BT_OFF + }; + + enum qca_capabilities { +@@ -870,7 +873,7 @@ static int qca_enqueue(struct hci_uart *hu, struct sk_buff *skb) + * Out-Of-Band(GPIOs control) sleep is selected. + * Don't wake the device up when suspending. + */ +- if (!test_bit(QCA_IBS_ENABLED, &qca->flags) || ++ if (test_bit(QCA_IBS_DISABLED, &qca->flags) || + test_bit(QCA_SUSPENDING, &qca->flags)) { + skb_queue_tail(&qca->txq, skb); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); +@@ -1015,7 +1018,7 @@ static void qca_controller_memdump(struct work_struct *work) + * the controller to send the dump is 8 seconds. let us + * start timer to handle this asynchronous activity. + */ +- clear_bit(QCA_IBS_ENABLED, &qca->flags); ++ set_bit(QCA_IBS_DISABLED, &qca->flags); + set_bit(QCA_MEMDUMP_COLLECTION, &qca->flags); + dump = (void *) skb->data; + dump_size = __le32_to_cpu(dump->dump_size); +@@ -1621,6 +1624,7 @@ static int qca_power_on(struct hci_dev *hdev) + struct hci_uart *hu = hci_get_drvdata(hdev); + enum qca_btsoc_type soc_type = qca_soc_type(hu); + struct qca_serdev *qcadev; ++ struct qca_data *qca = hu->priv; + int ret = 0; + + /* Non-serdev device usually is powered by external power +@@ -1640,6 +1644,7 @@ static int qca_power_on(struct hci_dev *hdev) + } + } + ++ clear_bit(QCA_BT_OFF, &qca->flags); + return ret; + } + +@@ -1659,7 +1664,7 @@ static int qca_setup(struct hci_uart *hu) + return ret; + + /* Patch downloading has to be done without IBS mode */ +- clear_bit(QCA_IBS_ENABLED, &qca->flags); ++ set_bit(QCA_IBS_DISABLED, &qca->flags); + + /* Enable controller to do both LE scan and BR/EDR inquiry + * simultaneously. +@@ -1710,7 +1715,7 @@ static int qca_setup(struct hci_uart *hu) + ret = qca_uart_setup(hdev, qca_baudrate, soc_type, soc_ver, + firmware_name); + if (!ret) { +- set_bit(QCA_IBS_ENABLED, &qca->flags); ++ clear_bit(QCA_IBS_DISABLED, &qca->flags); + qca_debugfs_init(hdev); + hu->hdev->hw_error = qca_hw_error; + hu->hdev->cmd_timeout = qca_cmd_timeout; +@@ -1814,7 +1819,7 @@ static void qca_power_shutdown(struct hci_uart *hu) + * data in skb's. + */ + spin_lock_irqsave(&qca->hci_ibs_lock, flags); +- clear_bit(QCA_IBS_ENABLED, &qca->flags); ++ set_bit(QCA_IBS_DISABLED, &qca->flags); + qca_flush(hu); + spin_unlock_irqrestore(&qca->hci_ibs_lock, flags); + +@@ -1833,6 +1838,8 @@ static void qca_power_shutdown(struct hci_uart *hu) + } else if (qcadev->bt_en) { + gpiod_set_value_cansleep(qcadev->bt_en, 0); + } ++ ++ set_bit(QCA_BT_OFF, &qca->flags); + } + + static int qca_power_off(struct hci_dev *hdev) +@@ -2093,11 +2100,34 @@ static int __maybe_unused qca_suspend(struct device *dev) + bool tx_pending = false; + int ret = 0; + u8 cmd; ++ u32 wait_timeout = 0; + + set_bit(QCA_SUSPENDING, &qca->flags); + +- /* Device is downloading patch or doesn't support in-band sleep. */ +- if (!test_bit(QCA_IBS_ENABLED, &qca->flags)) ++ if (test_bit(QCA_BT_OFF, &qca->flags)) ++ return 0; ++ ++ if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { ++ wait_timeout = test_bit(QCA_SSR_TRIGGERED, &qca->flags) ? ++ IBS_DISABLE_SSR_TIMEOUT_MS : ++ FW_DOWNLOAD_TIMEOUT_MS; ++ ++ /* QCA_IBS_DISABLED flag is set to true, During FW download ++ * and during memory dump collection. It is reset to false, ++ * After FW download complete and after memory dump collections. ++ */ ++ wait_on_bit_timeout(&qca->flags, QCA_IBS_DISABLED, ++ TASK_UNINTERRUPTIBLE, msecs_to_jiffies(wait_timeout)); ++ ++ if (test_bit(QCA_IBS_DISABLED, &qca->flags)) { ++ bt_dev_err(hu->hdev, "SSR or FW download time out"); ++ ret = -ETIMEDOUT; ++ goto error; ++ } ++ } ++ ++ /* After memory dump collection, Controller is powered off.*/ ++ if (test_bit(QCA_BT_OFF, &qca->flags)) + return 0; + + cancel_work_sync(&qca->ws_awake_device); +-- +2.39.0 + diff --git a/queue-5.10/drm-i915-gt-reset-twice.patch b/queue-5.10/drm-i915-gt-reset-twice.patch new file mode 100644 index 00000000000..ddda04d69de --- /dev/null +++ b/queue-5.10/drm-i915-gt-reset-twice.patch @@ -0,0 +1,90 @@ +From 3d0246017a00d063299130a38f1b8207af40f6e3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 12 Dec 2022 17:13:38 +0100 +Subject: drm/i915/gt: Reset twice + +From: Chris Wilson + +[ Upstream commit d3de5616d36462a646f5b360ba82d3b09ff668eb ] + +After applying an engine reset, on some platforms like Jasperlake, we +occasionally detect that the engine state is not cleared until shortly +after the resume. As we try to resume the engine with volatile internal +state, the first request fails with a spurious CS event (it looks like +it reports a lite-restore to the hung context, instead of the expected +idle->active context switch). + +Signed-off-by: Chris Wilson +Cc: stable@vger.kernel.org +Cc: Mika Kuoppala +Signed-off-by: Andi Shyti +Reviewed-by: Gwan-gyeong Mun +Link: https://patchwork.freedesktop.org/patch/msgid/20221212161338.1007659-1-andi.shyti@linux.intel.com +(cherry picked from commit 3db9d590557da3aa2c952f2fecd3e9b703dad790) +Signed-off-by: Rodrigo Vivi +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/i915/gt/intel_reset.c | 34 ++++++++++++++++++++++----- + 1 file changed, 28 insertions(+), 6 deletions(-) + +diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c +index ac36b67fb46b..00b5912a88b8 100644 +--- a/drivers/gpu/drm/i915/gt/intel_reset.c ++++ b/drivers/gpu/drm/i915/gt/intel_reset.c +@@ -289,6 +289,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, + static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) + { + struct intel_uncore *uncore = gt->uncore; ++ int loops = 2; + int err; + + /* +@@ -296,18 +297,39 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) + * for fifo space for the write or forcewake the chip for + * the read + */ +- intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); ++ do { ++ intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); + +- /* Wait for the device to ack the reset requests */ +- err = __intel_wait_for_register_fw(uncore, +- GEN6_GDRST, hw_domain_mask, 0, +- 500, 0, +- NULL); ++ /* ++ * Wait for the device to ack the reset requests. ++ * ++ * On some platforms, e.g. Jasperlake, we see that the ++ * engine register state is not cleared until shortly after ++ * GDRST reports completion, causing a failure as we try ++ * to immediately resume while the internal state is still ++ * in flux. If we immediately repeat the reset, the second ++ * reset appears to serialise with the first, and since ++ * it is a no-op, the registers should retain their reset ++ * value. However, there is still a concern that upon ++ * leaving the second reset, the internal engine state ++ * is still in flux and not ready for resuming. ++ */ ++ err = __intel_wait_for_register_fw(uncore, GEN6_GDRST, ++ hw_domain_mask, 0, ++ 2000, 0, ++ NULL); ++ } while (err == 0 && --loops); + if (err) + drm_dbg(>->i915->drm, + "Wait for 0x%08x engines reset failed\n", + hw_domain_mask); + ++ /* ++ * As we have observed that the engine state is still volatile ++ * after GDRST is acked, impose a small delay to let everything settle. ++ */ ++ udelay(50); ++ + return err; + } + +-- +2.39.0 + diff --git a/queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch b/queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch new file mode 100644 index 00000000000..a1c7b0e9ccd --- /dev/null +++ b/queue-5.10/efi-fix-userspace-infinite-retry-read-efivars-after-.patch @@ -0,0 +1,46 @@ +From 7806a2d37dd3220a1aa94e587bd5b0ca2958f951 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 27 Dec 2022 23:09:36 +0800 +Subject: efi: fix userspace infinite retry read efivars after EFI runtime + services page fault + +From: Ding Hui + +[ Upstream commit e006ac3003080177cf0b673441a4241f77aaecce ] + +After [1][2], if we catch exceptions due to EFI runtime service, we will +clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the +subsequent routine which invoke the EFI runtime service should fail. + +But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck +and infinite loop calling read() due to efivarfs_file_read() return -EINTR. + +The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is +an improper return value in this situation, so let virt_efi_xxx() return +EFI_DEVICE_ERROR and converted to -EIO to invoker. + +Cc: +Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services") +Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware") +Signed-off-by: Ding Hui +Signed-off-by: Ard Biesheuvel +Signed-off-by: Sasha Levin +--- + drivers/firmware/efi/runtime-wrappers.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c +index f3e54f6616f0..60075e0e4943 100644 +--- a/drivers/firmware/efi/runtime-wrappers.c ++++ b/drivers/firmware/efi/runtime-wrappers.c +@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work; + \ + if (!efi_enabled(EFI_RUNTIME_SERVICES)) { \ + pr_warn_once("EFI Runtime Services are disabled!\n"); \ ++ efi_rts_work.status = EFI_DEVICE_ERROR; \ + goto exit; \ + } \ + \ +-- +2.39.0 + diff --git a/queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch b/queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch new file mode 100644 index 00000000000..c130bcc4001 --- /dev/null +++ b/queue-5.10/io_uring-add-flag-for-disabling-provided-buffer-recy.patch @@ -0,0 +1,60 @@ +From eb53b52ebcf3811844dfa6718842c85d9bbe1401 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 23 Mar 2022 09:30:05 -0600 +Subject: io_uring: add flag for disabling provided buffer recycling + +From: Jens Axboe + +commit 8a3e8ee56417f5e0e66580d93941ed9d6f4c8274 upstream. + +If we need to continue doing this IO, then we don't want a potentially +selected buffer recycled. Add a flag for that. + +Set this for recv/recvmsg if they do partial IO. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 5 +++++ + 1 file changed, 5 insertions(+) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 3d67b9b4100f..7f9fb0cb9230 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -740,6 +740,7 @@ enum { + REQ_F_CREDS_BIT, + REQ_F_REFCOUNT_BIT, + REQ_F_ARM_LTIMEOUT_BIT, ++ REQ_F_PARTIAL_IO_BIT, + /* keep async read/write and isreg together and in order */ + REQ_F_NOWAIT_READ_BIT, + REQ_F_NOWAIT_WRITE_BIT, +@@ -795,6 +796,8 @@ enum { + REQ_F_REFCOUNT = BIT(REQ_F_REFCOUNT_BIT), + /* there is a linked timeout that has to be armed */ + REQ_F_ARM_LTIMEOUT = BIT(REQ_F_ARM_LTIMEOUT_BIT), ++ /* request has already done partial IO */ ++ REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT), + }; + + struct async_poll { +@@ -4963,6 +4966,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + ret = -EINTR; + if (ret > 0 && io_net_retry(sock, flags)) { + sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; + return io_setup_async_msg(req, kmsg); + } + req_set_fail(req); +@@ -5036,6 +5040,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) + sr->len -= ret; + sr->buf += ret; + sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; + return -EAGAIN; + } + req_set_fail(req); +-- +2.39.0 + diff --git a/queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch b/queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch new file mode 100644 index 00000000000..4538b87a183 --- /dev/null +++ b/queue-5.10/io_uring-allow-re-poll-if-we-made-progress.patch @@ -0,0 +1,53 @@ +From cb51b731674894f54045c3cd6ebab80e2a89af07 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Jan 2023 10:39:22 -0700 +Subject: io_uring: allow re-poll if we made progress + +From: Jens Axboe + +commit 10c873334febaeea9aa0c25c10b5ac0951b77a5f upstream. + +We currently check REQ_F_POLLED before arming async poll for a +notification to retry. If it's set, then we don't allow poll and will +punt to io-wq instead. This is done to prevent a situation where a buggy +driver will repeatedly return that there's space/data available yet we +get -EAGAIN. + +However, if we already transferred data, then it should be safe to rely +on poll again. Gate the check on whether or not REQ_F_PARTIAL_IO is +also set. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 75d833269751..cc8e13de5fa9 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -5694,7 +5694,7 @@ static int io_arm_poll_handler(struct io_kiocb *req) + + if (!req->file || !file_can_poll(req->file)) + return IO_APOLL_ABORTED; +- if (req->flags & REQ_F_POLLED) ++ if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED) + return IO_APOLL_ABORTED; + if (!def->pollin && !def->pollout) + return IO_APOLL_ABORTED; +@@ -5710,7 +5710,10 @@ static int io_arm_poll_handler(struct io_kiocb *req) + mask |= POLLOUT | POLLWRNORM; + } + +- apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); ++ if (req->flags & REQ_F_POLLED) ++ apoll = req->apoll; ++ else ++ apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC); + if (unlikely(!apoll)) + return IO_APOLL_ABORTED; + apoll->double_poll = NULL; +-- +2.39.0 + diff --git a/queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch b/queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch new file mode 100644 index 00000000000..b08e9fa6add --- /dev/null +++ b/queue-5.10/io_uring-check-for-valid-register-opcode-earlier.patch @@ -0,0 +1,45 @@ +From fa17a75eca8d286d3bedd687eb7e5e896d0ac374 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 23 Dec 2022 06:37:08 -0700 +Subject: io_uring: check for valid register opcode earlier + +From: Jens Axboe + +[ Upstream commit 343190841a1f22b96996d9f8cfab902a4d1bfd0e ] + +We only check the register opcode value inside the restricted ring +section, move it into the main io_uring_register() function instead +and check it up front. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 8c8ba8c067ca..f05f033d8496 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -10805,8 +10805,6 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, + return -ENXIO; + + if (ctx->restricted) { +- if (opcode >= IORING_REGISTER_LAST) +- return -EINVAL; + opcode = array_index_nospec(opcode, IORING_REGISTER_LAST); + if (!test_bit(opcode, ctx->restrictions.register_op)) + return -EACCES; +@@ -10938,6 +10936,9 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode, + long ret = -EBADF; + struct fd f; + ++ if (opcode >= IORING_REGISTER_LAST) ++ return -EINVAL; ++ + f = fdget(fd); + if (!f.file) + return -EBADF; +-- +2.39.0 + diff --git a/queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch b/queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch new file mode 100644 index 00000000000..cb7bf0d5ad0 --- /dev/null +++ b/queue-5.10/io_uring-do-not-recalculate-ppos-unnecessarily.patch @@ -0,0 +1,100 @@ +From 1bede82287bd73eb9e91510500259fa6cb6a4197 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Feb 2022 02:55:03 -0800 +Subject: io_uring: do not recalculate ppos unnecessarily + +From: Dylan Yudaken + +commit b4aec40015953b65f2f114641e7fd7714c8df8e6 upstream. + +There is a slight optimisation to be had by calculating the correct pos +pointer inside io_kiocb_update_pos and then using that later. + +It seems code size drops by a bit: +000000000000a1b0 0000000000000400 t io_read +000000000000a5b0 0000000000000319 t io_write + +vs +000000000000a1b0 00000000000003f6 t io_read +000000000000a5b0 0000000000000310 t io_write + +Signed-off-by: Dylan Yudaken +Reviewed-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 18 ++++++++++++------ + 1 file changed, 12 insertions(+), 6 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index eaf8463c9b14..d4e017b07371 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3000,18 +3000,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) + } + } + +-static inline void io_kiocb_update_pos(struct io_kiocb *req) ++static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req) + { + struct kiocb *kiocb = &req->rw.kiocb; ++ bool is_stream = req->file->f_mode & FMODE_STREAM; + + if (kiocb->ki_pos == -1) { +- if (!(req->file->f_mode & FMODE_STREAM)) { ++ if (!is_stream) { + req->flags |= REQ_F_CUR_POS; + kiocb->ki_pos = req->file->f_pos; ++ return &kiocb->ki_pos; + } else { + kiocb->ki_pos = 0; ++ return NULL; + } + } ++ return is_stream ? NULL : &kiocb->ki_pos; + } + + static void kiocb_done(struct kiocb *kiocb, ssize_t ret, +@@ -3537,6 +3541,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; ++ loff_t *ppos; + + if (rw) { + iter = &rw->iter; +@@ -3569,9 +3574,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) + return ret ?: -EAGAIN; + } + +- io_kiocb_update_pos(req); ++ ppos = io_kiocb_update_pos(req); + +- ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); ++ ret = rw_verify_area(READ, req->file, ppos, req->result); + if (unlikely(ret)) { + kfree(iovec); + return ret; +@@ -3675,6 +3680,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) + bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; + struct iov_iter_state __state, *state; + ssize_t ret, ret2; ++ loff_t *ppos; + + if (rw) { + iter = &rw->iter; +@@ -3705,9 +3711,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) + (req->flags & REQ_F_ISREG)) + goto copy_iov; + +- io_kiocb_update_pos(req); ++ ppos = io_kiocb_update_pos(req); + +- ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); ++ ret = rw_verify_area(WRITE, req->file, ppos, req->result); + if (unlikely(ret)) + goto out_free; + +-- +2.39.0 + diff --git a/queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch b/queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch new file mode 100644 index 00000000000..096d0d9d187 --- /dev/null +++ b/queue-5.10/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch @@ -0,0 +1,47 @@ +From 6e48a11832c85d20bab6944befbb7ebc0d1bac70 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Jan 2023 20:50:24 -0700 +Subject: io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL + +From: Jens Axboe + +commit 46a525e199e4037516f7e498c18f065b09df32ac upstream. + +This isn't a reliable mechanism to tell if we have task_work pending, we +really should be looking at whether we have any items queued. This is +problematic if forward progress is gated on running said task_work. One +such example is reading from a pipe, where the write side has been closed +right before the read is started. The fput() of the file queues TWA_RESUME +task_work, and we need that task_work to be run before ->release() is +called for the pipe. If ->release() isn't called, then the read will sit +forever waiting on data that will never arise. + +Fix this by io_run_task_work() so it checks if we have task_work pending +rather than rely on TIF_NOTIFY_SIGNAL for that. The latter obviously +doesn't work for task_work that is queued without TWA_SIGNAL. + +Reported-by: Christiano Haesbaert +Cc: stable@vger.kernel.org +Link: https://github.com/axboe/liburing/issues/665 +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io-wq.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c +index 87bc38b47103..81485c1a9879 100644 +--- a/io_uring/io-wq.c ++++ b/io_uring/io-wq.c +@@ -513,7 +513,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct, + + static bool io_flush_signals(void) + { +- if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { ++ if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) { + __set_current_state(TASK_RUNNING); + tracehook_notify_signal(); + return true; +-- +2.39.0 + diff --git a/queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch b/queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch new file mode 100644 index 00000000000..4ed7b3ed4cc --- /dev/null +++ b/queue-5.10/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch @@ -0,0 +1,107 @@ +From 8246ae8ea8105eda4453b0b2fea90815b74d5152 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Jan 2023 10:21:22 -0700 +Subject: io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly + +From: Jens Axboe + +commit 7ba89d2af17aa879dda30f5d5d3f152e587fc551 upstream. + +We currently don't attempt to get the full asked for length even if +MSG_WAITALL is set, if we get a partial receive. If we do see a partial +receive, then just note how many bytes we did and return -EAGAIN to +get it retried. + +The iov is advanced appropriately for the vector based case, and we +manually bump the buffer and remainder for the non-vector case. + +Cc: stable@vger.kernel.org +Reported-by: Constantine Gavrilov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 28 ++++++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 34dd6267679a..3d67b9b4100f 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -578,6 +578,7 @@ struct io_sr_msg { + int msg_flags; + int bgid; + size_t len; ++ size_t done_io; + struct io_buffer *kbuf; + }; + +@@ -4903,12 +4904,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; + #endif ++ sr->done_io = 0; + return 0; + } + ++static bool io_net_retry(struct socket *sock, int flags) ++{ ++ if (!(flags & MSG_WAITALL)) ++ return false; ++ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; ++} ++ + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_async_msghdr iomsg, *kmsg; ++ struct io_sr_msg *sr = &req->sr_msg; + struct socket *sock; + struct io_buffer *kbuf; + unsigned flags; +@@ -4951,6 +4961,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->done_io += ret; ++ return io_setup_async_msg(req, kmsg); ++ } + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + req_set_fail(req); +@@ -4962,6 +4976,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, cflags); + return 0; + } +@@ -5014,12 +5032,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->len -= ret; ++ sr->buf += ret; ++ sr->done_io += ret; ++ return -EAGAIN; ++ } + req_set_fail(req); + } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { + req_set_fail(req); + } + if (req->flags & REQ_F_BUFFER_SELECTED) + cflags = io_put_recv_kbuf(req); ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, cflags); + return 0; + } +-- +2.39.0 + diff --git a/queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch b/queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch new file mode 100644 index 00000000000..14da9a717d9 --- /dev/null +++ b/queue-5.10/io_uring-ensure-that-cached-task-references-are-alwa.patch @@ -0,0 +1,55 @@ +From a0e88106b70717e45b0957db31d9a30f027f8638 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Jan 2023 12:36:08 -0700 +Subject: io_uring: ensure that cached task references are always put on exit + +From: Jens Axboe + +commit e775f93f2ab976a2cdb4a7b53063cbe890904f73 upstream. + +io_uring caches task references to avoid doing atomics for each of them +per request. If a request is put from the same task that allocated it, +then we can maintain a per-ctx cache of them. This obviously relies +on io_uring always pruning caches in a reliable way, and there's +currently a case off io_uring fd release where we can miss that. + +One example is a ring setup with IOPOLL, which relies on the task +polling for completions, which will free them. However, if such a task +submits a request and then exits or closes the ring without reaping +the completion, then ring release will reap and put. If release happens +from that very same task, the completed request task refs will get +put back into the cache pool. This is problematic, as we're now beyond +the point of pruning caches. + +Manually drop these caches after doing an IOPOLL reap. This releases +references from the current task, which is enough. If another task +happens to be doing the release, then the caching will not be +triggered and there's no issue. + +Cc: stable@vger.kernel.org +Fixes: e98e49b2bbf7 ("io_uring: extend task put optimisations") +Reported-by: Homin Rhee +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index e8852d56b1ec..f8a0d228d799 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -9513,6 +9513,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) + /* if we failed setting up the ctx, we might not have any rings */ + io_iopoll_try_reap_events(ctx); + ++ /* drop cached put refs after potentially doing completions */ ++ if (current->io_uring) ++ io_uring_drop_tctx_refs(current); ++ + INIT_WORK(&ctx->exit_work, io_ring_exit_work); + /* + * Use system_unbound_wq to avoid spawning tons of event kworkers +-- +2.39.0 + diff --git a/queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch b/queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch new file mode 100644 index 00000000000..0f4ae7e26bd --- /dev/null +++ b/queue-5.10/io_uring-fix-async-accept-on-o_nonblock-sockets.patch @@ -0,0 +1,50 @@ +From 965d1b22a5e0f21db9f1dd08bf9f07d2799e6f81 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Jan 2023 09:13:12 -0700 +Subject: io_uring: fix async accept on O_NONBLOCK sockets + +From: Dylan Yudaken + +commit a73825ba70c93e1eb39a845bb3d9885a787f8ffe upstream. + +Do not set REQ_F_NOWAIT if the socket is non blocking. When enabled this +causes the accept to immediately post a CQE with EAGAIN, which means you +cannot perform an accept SQE on a NONBLOCK socket asynchronously. + +By removing the flag if there is no pending accept then poll is armed as +usual and when a connection comes in the CQE is posted. + +Signed-off-by: Dylan Yudaken +Link: https://lore.kernel.org/r/20220324143435.2875844-1-dylany@fb.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index cc8e13de5fa9..8c8ba8c067ca 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -5112,9 +5112,6 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) + struct file *file; + int ret, fd; + +- if (req->file->f_flags & O_NONBLOCK) +- req->flags |= REQ_F_NOWAIT; +- + if (!fixed) { + fd = __get_unused_fd_flags(accept->flags, accept->nofile); + if (unlikely(fd < 0)) +@@ -5127,6 +5124,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags) + if (!fixed) + put_unused_fd(fd); + ret = PTR_ERR(file); ++ /* safe to retry */ ++ req->flags |= REQ_F_PARTIAL_IO; + if (ret == -EAGAIN && force_nonblock) + return -EAGAIN; + if (ret == -ERESTARTSYS) +-- +2.39.0 + diff --git a/queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch b/queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch new file mode 100644 index 00000000000..007c9f65e1d --- /dev/null +++ b/queue-5.10/io_uring-fix-cq-waiting-timeout-handling.patch @@ -0,0 +1,60 @@ +From e149257b0a2a60b966c3855202018f7acd711044 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 5 Jan 2023 10:49:15 +0000 +Subject: io_uring: fix CQ waiting timeout handling + +From: Pavel Begunkov + +commit 12521a5d5cb7ff0ad43eadfc9c135d86e1131fa8 upstream. + +Jiffy to ktime CQ waiting conversion broke how we treat timeouts, in +particular we rearm it anew every time we get into +io_cqring_wait_schedule() without adjusting the timeout. Waiting for 2 +CQEs and getting a task_work in the middle may double the timeout value, +or even worse in some cases task may wait indefinitely. + +Cc: stable@vger.kernel.org +Fixes: 228339662b398 ("io_uring: don't convert to jiffies for waiting on timeouts") +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/f7bffddd71b08f28a877d44d37ac953ddb01590d.1672915663.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index b7bd5138bdaf..e8852d56b1ec 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -7518,7 +7518,7 @@ static int io_run_task_work_sig(void) + /* when returns >0, the caller should retry */ + static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, + struct io_wait_queue *iowq, +- ktime_t timeout) ++ ktime_t *timeout) + { + int ret; + +@@ -7530,7 +7530,7 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx, + if (test_bit(0, &ctx->check_cq_overflow)) + return 1; + +- if (!schedule_hrtimeout(&timeout, HRTIMER_MODE_ABS)) ++ if (!schedule_hrtimeout(timeout, HRTIMER_MODE_ABS)) + return -ETIME; + return 1; + } +@@ -7593,7 +7593,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, + } + prepare_to_wait_exclusive(&ctx->cq_wait, &iowq.wq, + TASK_INTERRUPTIBLE); +- ret = io_cqring_wait_schedule(ctx, &iowq, timeout); ++ ret = io_cqring_wait_schedule(ctx, &iowq, &timeout); + finish_wait(&ctx->cq_wait, &iowq.wq); + cond_resched(); + } while (ret > 0); +-- +2.39.0 + diff --git a/queue-5.10/io_uring-improve-send-recv-error-handling.patch b/queue-5.10/io_uring-improve-send-recv-error-handling.patch new file mode 100644 index 00000000000..f4b4adee079 --- /dev/null +++ b/queue-5.10/io_uring-improve-send-recv-error-handling.patch @@ -0,0 +1,126 @@ +From 762279fd2bd47c45c44bd2fa4fee2f84c6dbed86 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Nov 2021 00:07:47 +0000 +Subject: io_uring: improve send/recv error handling + +From: Pavel Begunkov + +commit 7297ce3d59449de49d3c9e1f64ae25488750a1fc upstream. + +Hide all error handling under common if block, removes two extra ifs on +the success path and keeps the handling more condensed. + +Signed-off-by: Pavel Begunkov +Link: https://lore.kernel.org/r/5761545158a12968f3caf30f747eea65ed75dfc1.1637524285.git.asml.silence@gmail.com +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 55 +++++++++++++++++++++++++-------------------- + 1 file changed, 31 insertions(+), 24 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 0c4d16afb9ef..34dd6267679a 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -4706,17 +4706,18 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) + min_ret = iov_iter_count(&kmsg->msg.msg_iter); + + ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags); +- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) +- return io_setup_async_msg(req, kmsg); +- if (ret == -ERESTARTSYS) +- ret = -EINTR; + ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) ++ return io_setup_async_msg(req, kmsg); ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ req_set_fail(req); ++ } + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < min_ret) +- req_set_fail(req); + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } +@@ -4752,13 +4753,13 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) + + msg.msg_flags = flags; + ret = sock_sendmsg(sock, &msg); +- if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN) +- return -EAGAIN; +- if (ret == -ERESTARTSYS) +- ret = -EINTR; +- +- if (ret < min_ret) ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK)) ++ return -EAGAIN; ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; + req_set_fail(req); ++ } + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } +@@ -4945,10 +4946,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + + ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg, + kmsg->uaddr, flags); +- if (force_nonblock && ret == -EAGAIN) +- return io_setup_async_msg(req, kmsg); +- if (ret == -ERESTARTSYS) +- ret = -EINTR; ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && force_nonblock) ++ return io_setup_async_msg(req, kmsg); ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ req_set_fail(req); ++ } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { ++ req_set_fail(req); ++ } + + if (req->flags & REQ_F_BUFFER_SELECTED) + cflags = io_put_recv_kbuf(req); +@@ -4956,8 +4962,6 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; +- if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) +- req_set_fail(req); + __io_req_complete(req, issue_flags, ret, cflags); + return 0; + } +@@ -5004,15 +5008,18 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags) + min_ret = iov_iter_count(&msg.msg_iter); + + ret = sock_recvmsg(sock, &msg, flags); +- if (force_nonblock && ret == -EAGAIN) +- return -EAGAIN; +- if (ret == -ERESTARTSYS) +- ret = -EINTR; + out_free: ++ if (ret < min_ret) { ++ if (ret == -EAGAIN && force_nonblock) ++ return -EAGAIN; ++ if (ret == -ERESTARTSYS) ++ ret = -EINTR; ++ req_set_fail(req); ++ } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) { ++ req_set_fail(req); ++ } + if (req->flags & REQ_F_BUFFER_SELECTED) + cflags = io_put_recv_kbuf(req); +- if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)))) +- req_set_fail(req); + __io_req_complete(req, issue_flags, ret, cflags); + return 0; + } +-- +2.39.0 + diff --git a/queue-5.10/io_uring-lock-overflowing-for-iopoll.patch b/queue-5.10/io_uring-lock-overflowing-for-iopoll.patch new file mode 100644 index 00000000000..33888d86712 --- /dev/null +++ b/queue-5.10/io_uring-lock-overflowing-for-iopoll.patch @@ -0,0 +1,79 @@ +From fcf3f646e11811e91abc477e5f256607c382b9ea Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 14 Jan 2023 09:14:03 -0700 +Subject: io_uring: lock overflowing for IOPOLL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Pavel Begunkov + +commit 544d163d659d45a206d8929370d5a2984e546cb7 upstream. + +syzbot reports an issue with overflow filling for IOPOLL: + +WARNING: CPU: 0 PID: 28 at io_uring/io_uring.c:734 io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 +CPU: 0 PID: 28 Comm: kworker/u4:1 Not tainted 6.2.0-rc3-syzkaller-16369-g358a161a6a9e #0 +Workqueue: events_unbound io_ring_exit_work +Call trace: + io_cqring_event_overflow+0x1c0/0x230 io_uring/io_uring.c:734 + io_req_cqe_overflow+0x5c/0x70 io_uring/io_uring.c:773 + io_fill_cqe_req io_uring/io_uring.h:168 [inline] + io_do_iopoll+0x474/0x62c io_uring/rw.c:1065 + io_iopoll_try_reap_events+0x6c/0x108 io_uring/io_uring.c:1513 + io_uring_try_cancel_requests+0x13c/0x258 io_uring/io_uring.c:3056 + io_ring_exit_work+0xec/0x390 io_uring/io_uring.c:2869 + process_one_work+0x2d8/0x504 kernel/workqueue.c:2289 + worker_thread+0x340/0x610 kernel/workqueue.c:2436 + kthread+0x12c/0x158 kernel/kthread.c:376 + ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:863 + +There is no real problem for normal IOPOLL as flush is also called with +uring_lock taken, but it's getting more complicated for IOPOLL|SQPOLL, +for which __io_cqring_overflow_flush() happens from the CQ waiting path. + +Reported-and-tested-by: syzbot+6805087452d72929404e@syzkaller.appspotmail.com +Cc: stable@vger.kernel.org # 5.10+ +Signed-off-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 18 ++++++++++++++++-- + 1 file changed, 16 insertions(+), 2 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index f05f033d8496..b7bd5138bdaf 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2482,12 +2482,26 @@ static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events, + + io_init_req_batch(&rb); + while (!list_empty(done)) { ++ struct io_uring_cqe *cqe; ++ unsigned cflags; ++ + req = list_first_entry(done, struct io_kiocb, inflight_entry); + list_del(&req->inflight_entry); +- +- io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req)); ++ cflags = io_put_rw_kbuf(req); + (*nr_events)++; + ++ cqe = io_get_cqe(ctx); ++ if (cqe) { ++ WRITE_ONCE(cqe->user_data, req->user_data); ++ WRITE_ONCE(cqe->res, req->result); ++ WRITE_ONCE(cqe->flags, cflags); ++ } else { ++ spin_lock(&ctx->completion_lock); ++ io_cqring_event_overflow(ctx, req->user_data, ++ req->result, cflags); ++ spin_unlock(&ctx->completion_lock); ++ } ++ + if (req_ref_put_and_test(req)) + io_req_free_batch(&rb, req, &ctx->submit_state); + } +-- +2.39.0 + diff --git a/queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch b/queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch new file mode 100644 index 00000000000..fb4bd1bb52e --- /dev/null +++ b/queue-5.10/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch @@ -0,0 +1,65 @@ +From 57543eec020ee514e90dfd4c9eaf6938e861052e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Feb 2022 02:55:01 -0800 +Subject: io_uring: remove duplicated calls to io_kiocb_ppos + +From: Dylan Yudaken + +commit af9c45ecebaf1b428306f41421f4bcffe439f735 upstream. + +io_kiocb_ppos is called in both branches, and it seems that the compiler +does not fuse this. Fusing removes a few bytes from loop_rw_iter. + +Before: +$ nm -S fs/io_uring.o | grep loop_rw_iter +0000000000002430 0000000000000124 t loop_rw_iter + +After: +$ nm -S fs/io_uring.o | grep loop_rw_iter +0000000000002430 000000000000010d t loop_rw_iter + +Signed-off-by: Dylan Yudaken +Reviewed-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index f8a0d228d799..d8926475cd88 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -3300,6 +3300,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) + struct kiocb *kiocb = &req->rw.kiocb; + struct file *file = req->file; + ssize_t ret = 0; ++ loff_t *ppos; + + /* + * Don't support polled IO through this interface, and we can't +@@ -3311,6 +3312,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) + if (kiocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + ++ ppos = io_kiocb_ppos(kiocb); ++ + while (iov_iter_count(iter)) { + struct iovec iovec; + ssize_t nr; +@@ -3324,10 +3327,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter) + + if (rw == READ) { + nr = file->f_op->read(file, iovec.iov_base, +- iovec.iov_len, io_kiocb_ppos(kiocb)); ++ iovec.iov_len, ppos); + } else { + nr = file->f_op->write(file, iovec.iov_base, +- iovec.iov_len, io_kiocb_ppos(kiocb)); ++ iovec.iov_len, ppos); + } + + if (nr < 0) { +-- +2.39.0 + diff --git a/queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch b/queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch new file mode 100644 index 00000000000..835139fa321 --- /dev/null +++ b/queue-5.10/io_uring-rw-defer-fsnotify-calls-to-task-context.patch @@ -0,0 +1,122 @@ +From 1b2cbd41576d68fdd10224114346104806482df4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 21 Jan 2023 13:38:51 -0700 +Subject: io_uring/rw: defer fsnotify calls to task context + +From: Jens Axboe + +commit b000145e9907809406d8164c3b2b8861d95aecd1 upstream. + +We can't call these off the kiocb completion as that might be off +soft/hard irq context. Defer the calls to when we process the +task_work for this request. That avoids valid complaints like: + +stack backtrace: +CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.0.0-rc6-syzkaller-00321-g105a36f3694e #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022 +Call Trace: + + __dump_stack lib/dump_stack.c:88 [inline] + dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106 + print_usage_bug kernel/locking/lockdep.c:3961 [inline] + valid_state kernel/locking/lockdep.c:3973 [inline] + mark_lock_irq kernel/locking/lockdep.c:4176 [inline] + mark_lock.part.0.cold+0x18/0xd8 kernel/locking/lockdep.c:4632 + mark_lock kernel/locking/lockdep.c:4596 [inline] + mark_usage kernel/locking/lockdep.c:4527 [inline] + __lock_acquire+0x11d9/0x56d0 kernel/locking/lockdep.c:5007 + lock_acquire kernel/locking/lockdep.c:5666 [inline] + lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631 + __fs_reclaim_acquire mm/page_alloc.c:4674 [inline] + fs_reclaim_acquire+0x115/0x160 mm/page_alloc.c:4688 + might_alloc include/linux/sched/mm.h:271 [inline] + slab_pre_alloc_hook mm/slab.h:700 [inline] + slab_alloc mm/slab.c:3278 [inline] + __kmem_cache_alloc_lru mm/slab.c:3471 [inline] + kmem_cache_alloc+0x39/0x520 mm/slab.c:3491 + fanotify_alloc_fid_event fs/notify/fanotify/fanotify.c:580 [inline] + fanotify_alloc_event fs/notify/fanotify/fanotify.c:813 [inline] + fanotify_handle_event+0x1130/0x3f40 fs/notify/fanotify/fanotify.c:948 + send_to_group fs/notify/fsnotify.c:360 [inline] + fsnotify+0xafb/0x1680 fs/notify/fsnotify.c:570 + __fsnotify_parent+0x62f/0xa60 fs/notify/fsnotify.c:230 + fsnotify_parent include/linux/fsnotify.h:77 [inline] + fsnotify_file include/linux/fsnotify.h:99 [inline] + fsnotify_access include/linux/fsnotify.h:309 [inline] + __io_complete_rw_common+0x485/0x720 io_uring/rw.c:195 + io_complete_rw+0x1a/0x1f0 io_uring/rw.c:228 + iomap_dio_complete_work fs/iomap/direct-io.c:144 [inline] + iomap_dio_bio_end_io+0x438/0x5e0 fs/iomap/direct-io.c:178 + bio_endio+0x5f9/0x780 block/bio.c:1564 + req_bio_endio block/blk-mq.c:695 [inline] + blk_update_request+0x3fc/0x1300 block/blk-mq.c:825 + scsi_end_request+0x7a/0x9a0 drivers/scsi/scsi_lib.c:541 + scsi_io_completion+0x173/0x1f70 drivers/scsi/scsi_lib.c:971 + scsi_complete+0x122/0x3b0 drivers/scsi/scsi_lib.c:1438 + blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1022 + __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571 + invoke_softirq kernel/softirq.c:445 [inline] + __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650 + irq_exit_rcu+0x5/0x20 kernel/softirq.c:662 + common_interrupt+0xa9/0xc0 arch/x86/kernel/irq.c:240 + +Fixes: f63cf5192fe3 ("io_uring: ensure that fsnotify is always called") +Link: https://lore.kernel.org/all/20220929135627.ykivmdks2w5vzrwg@quack3/ +Reported-by: syzbot+dfcc5f4da15868df7d4d@syzkaller.appspotmail.com +Reported-by: Jan Kara +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 22 +++++++++++++++------- + 1 file changed, 15 insertions(+), 7 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index d4e017b07371..33e6e1011105 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2702,12 +2702,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req) + + static bool __io_complete_rw_common(struct io_kiocb *req, long res) + { +- if (req->rw.kiocb.ki_flags & IOCB_WRITE) { +- kiocb_end_write(req); +- fsnotify_modify(req->file); +- } else { +- fsnotify_access(req->file); +- } + if (res != req->result) { + if ((res == -EAGAIN || res == -EOPNOTSUPP) && + io_rw_should_reissue(req)) { +@@ -2760,6 +2754,20 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, + __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req)); + } + ++static void io_req_rw_complete(struct io_kiocb *req, bool *locked) ++{ ++ struct io_rw *rw = &req->rw; ++ ++ if (rw->kiocb.ki_flags & IOCB_WRITE) { ++ kiocb_end_write(req); ++ fsnotify_modify(req->file); ++ } else { ++ fsnotify_access(req->file); ++ } ++ ++ io_req_task_complete(req, locked); ++} ++ + static void io_complete_rw(struct kiocb *kiocb, long res, long res2) + { + struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +@@ -2767,7 +2775,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2) + if (__io_complete_rw_common(req, res)) + return; + req->result = io_fixup_rw_res(req, res); +- req->io_task_work.func = io_req_task_complete; ++ req->io_task_work.func = io_req_rw_complete; + io_req_task_work_add(req); + } + +-- +2.39.0 + diff --git a/queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch b/queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch new file mode 100644 index 00000000000..07f6c582fa6 --- /dev/null +++ b/queue-5.10/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch @@ -0,0 +1,111 @@ +From 64234e92ae08fd4259a60c12858cb1597bb76a33 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 20 Apr 2022 19:21:36 -0600 +Subject: io_uring: support MSG_WAITALL for IORING_OP_SEND(MSG) + +From: Jens Axboe + +commit 4c3c09439c08b03d9503df0ca4c7619c5842892e upstream. + +Like commit 7ba89d2af17a for recv/recvmsg, support MSG_WAITALL for the +send side. If this flag is set and we do a short send, retry for a +stream of seqpacket socket. + +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 36 +++++++++++++++++++++++++++++------- + 1 file changed, 29 insertions(+), 7 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index 7f9fb0cb9230..75d833269751 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -4617,6 +4617,13 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags) + } + + #if defined(CONFIG_NET) ++static bool io_net_retry(struct socket *sock, int flags) ++{ ++ if (!(flags & MSG_WAITALL)) ++ return false; ++ return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; ++} ++ + static int io_setup_async_msg(struct io_kiocb *req, + struct io_async_msghdr *kmsg) + { +@@ -4680,12 +4687,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + if (req->ctx->compat) + sr->msg_flags |= MSG_CMSG_COMPAT; + #endif ++ sr->done_io = 0; + return 0; + } + + static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_async_msghdr iomsg, *kmsg; ++ struct io_sr_msg *sr = &req->sr_msg; + struct socket *sock; + unsigned flags; + int min_ret = 0; +@@ -4716,12 +4725,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags) + return io_setup_async_msg(req, kmsg); + if (ret == -ERESTARTSYS) + ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return io_setup_async_msg(req, kmsg); ++ } + req_set_fail(req); + } + /* fast path, check for non-NULL to avoid function call */ + if (kmsg->free_iov) + kfree(kmsg->free_iov); + req->flags &= ~REQ_F_NEED_CLEANUP; ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } +@@ -4762,8 +4780,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags) + return -EAGAIN; + if (ret == -ERESTARTSYS) + ret = -EINTR; ++ if (ret > 0 && io_net_retry(sock, flags)) { ++ sr->len -= ret; ++ sr->buf += ret; ++ sr->done_io += ret; ++ req->flags |= REQ_F_PARTIAL_IO; ++ return -EAGAIN; ++ } + req_set_fail(req); + } ++ if (ret >= 0) ++ ret += sr->done_io; ++ else if (sr->done_io) ++ ret = sr->done_io; + __io_req_complete(req, issue_flags, ret, 0); + return 0; + } +@@ -4911,13 +4940,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) + return 0; + } + +-static bool io_net_retry(struct socket *sock, int flags) +-{ +- if (!(flags & MSG_WAITALL)) +- return false; +- return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET; +-} +- + static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags) + { + struct io_async_msghdr iomsg, *kmsg; +-- +2.39.0 + diff --git a/queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch b/queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch new file mode 100644 index 00000000000..1b701b2f9bb --- /dev/null +++ b/queue-5.10/io_uring-update-kiocb-ki_pos-at-execution-time.patch @@ -0,0 +1,86 @@ +From 95109edd090ae15269d711c14faf965d7b4d8b83 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 22 Feb 2022 02:55:02 -0800 +Subject: io_uring: update kiocb->ki_pos at execution time + +From: Dylan Yudaken + +commit d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e upstream. + +Update kiocb->ki_pos at execution time rather than in io_prep_rw(). +io_prep_rw() happens before the job is enqueued to a worker and so the +offset might be read multiple times before being executed once. + +Ensures that the file position in a set of _linked_ SQEs will be only +obtained after earlier SQEs have completed, and so will include their +incremented file position. + +Signed-off-by: Dylan Yudaken +Reviewed-by: Pavel Begunkov +Signed-off-by: Jens Axboe +Signed-off-by: Sasha Levin +--- + io_uring/io_uring.c | 26 ++++++++++++++++++-------- + 1 file changed, 18 insertions(+), 8 deletions(-) + +diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c +index d8926475cd88..eaf8463c9b14 100644 +--- a/io_uring/io_uring.c ++++ b/io_uring/io_uring.c +@@ -2919,14 +2919,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, + req->flags |= REQ_F_ISREG; + + kiocb->ki_pos = READ_ONCE(sqe->off); +- if (kiocb->ki_pos == -1) { +- if (!(file->f_mode & FMODE_STREAM)) { +- req->flags |= REQ_F_CUR_POS; +- kiocb->ki_pos = file->f_pos; +- } else { +- kiocb->ki_pos = 0; +- } +- } + kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); + kiocb->ki_flags = iocb_flags(kiocb->ki_filp); + ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); +@@ -3008,6 +3000,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret) + } + } + ++static inline void io_kiocb_update_pos(struct io_kiocb *req) ++{ ++ struct kiocb *kiocb = &req->rw.kiocb; ++ ++ if (kiocb->ki_pos == -1) { ++ if (!(req->file->f_mode & FMODE_STREAM)) { ++ req->flags |= REQ_F_CUR_POS; ++ kiocb->ki_pos = req->file->f_pos; ++ } else { ++ kiocb->ki_pos = 0; ++ } ++ } ++} ++ + static void kiocb_done(struct kiocb *kiocb, ssize_t ret, + unsigned int issue_flags) + { +@@ -3563,6 +3569,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) + return ret ?: -EAGAIN; + } + ++ io_kiocb_update_pos(req); ++ + ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); + if (unlikely(ret)) { + kfree(iovec); +@@ -3697,6 +3705,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) + (req->flags & REQ_F_ISREG)) + goto copy_iov; + ++ io_kiocb_update_pos(req); ++ + ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); + if (unlikely(ret)) + goto out_free; +-- +2.39.0 + diff --git a/queue-5.10/series b/queue-5.10/series index fe23331850e..fe767a8e5cb 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -12,3 +12,23 @@ wifi-mac80211-sdata-can-be-null-during-ampdu-start.patch add-exception-protection-processing-for-vd-in-axi_chan_handle_err-function.patch zonefs-detect-append-writes-at-invalid-locations.patch nilfs2-fix-general-protection-fault-in-nilfs_btree_insert.patch +efi-fix-userspace-infinite-retry-read-efivars-after-.patch +alsa-hda-realtek-turn-on-power-early.patch +drm-i915-gt-reset-twice.patch +bluetooth-hci_qca-wait-for-timeout-during-suspend.patch +bluetooth-hci_qca-fix-driver-shutdown-on-closed-serd.patch +io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch +io_uring-improve-send-recv-error-handling.patch +io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch +io_uring-add-flag-for-disabling-provided-buffer-recy.patch +io_uring-support-msg_waitall-for-ioring_op_send-msg.patch +io_uring-allow-re-poll-if-we-made-progress.patch +io_uring-fix-async-accept-on-o_nonblock-sockets.patch +io_uring-check-for-valid-register-opcode-earlier.patch +io_uring-lock-overflowing-for-iopoll.patch +io_uring-fix-cq-waiting-timeout-handling.patch +io_uring-ensure-that-cached-task-references-are-alwa.patch +io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch +io_uring-update-kiocb-ki_pos-at-execution-time.patch +io_uring-do-not-recalculate-ppos-unnecessarily.patch +io_uring-rw-defer-fsnotify-calls-to-task-context.patch -- 2.47.3