]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
Fixes for 5.15
authorSasha Levin <sashal@kernel.org>
Sun, 22 Jan 2023 04:27:49 +0000 (23:27 -0500)
committerSasha Levin <sashal@kernel.org>
Sun, 22 Jan 2023 04:27:49 +0000 (23:27 -0500)
Signed-off-by: Sasha Levin <sashal@kernel.org>
22 files changed:
queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-don-t-work-fo.patch [new file with mode: 0644]
queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-for-a-hp-prob.patch [new file with mode: 0644]
queue-5.15/drm-amd-delay-removal-of-the-firmware-framebuffer.patch [new file with mode: 0644]
queue-5.15/drm-amdgpu-disable-runtime-pm-on-several-sienna-cich.patch [new file with mode: 0644]
queue-5.15/efi-fix-userspace-infinite-retry-read-efivars-after-.patch [new file with mode: 0644]
queue-5.15/eventfd-provide-a-eventfd_signal_mask-helper.patch [new file with mode: 0644]
queue-5.15/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch [new file with mode: 0644]
queue-5.15/hugetlb-unshare-some-pmds-when-splitting-vmas.patch [new file with mode: 0644]
queue-5.15/io_uring-add-flag-for-disabling-provided-buffer-recy.patch [new file with mode: 0644]
queue-5.15/io_uring-allow-re-poll-if-we-made-progress.patch [new file with mode: 0644]
queue-5.15/io_uring-do-not-recalculate-ppos-unnecessarily.patch [new file with mode: 0644]
queue-5.15/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch [new file with mode: 0644]
queue-5.15/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch [new file with mode: 0644]
queue-5.15/io_uring-ensure-that-cached-task-references-are-alwa.patch [new file with mode: 0644]
queue-5.15/io_uring-fix-async-accept-on-o_nonblock-sockets.patch [new file with mode: 0644]
queue-5.15/io_uring-improve-send-recv-error-handling.patch [new file with mode: 0644]
queue-5.15/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch [new file with mode: 0644]
queue-5.15/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch [new file with mode: 0644]
queue-5.15/io_uring-rw-defer-fsnotify-calls-to-task-context.patch [new file with mode: 0644]
queue-5.15/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch [new file with mode: 0644]
queue-5.15/io_uring-update-kiocb-ki_pos-at-execution-time.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-don-t-work-fo.patch b/queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-don-t-work-fo.patch
new file mode 100644 (file)
index 0000000..0242371
--- /dev/null
@@ -0,0 +1,37 @@
+From cf8d448a5fbdf2a0300125d11c2920002d9313b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Jan 2023 12:41:53 +0800
+Subject: ALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform
+
+From: Jeremy Szu <jeremy.szu@canonical.com>
+
+[ Upstream commit 9c694fbfe6f36017b060ad74c7565cb379852e40 ]
+
+There is a HP platform uses ALC236 codec which using GPIO2 to control
+mute LED and GPIO1 to control micmute LED.
+Thus, add a quirk to make them work.
+
+Signed-off-by: Jeremy Szu <jeremy.szu@canonical.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20230105044154.8242-1-jeremy.szu@canonical.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index feb337083573..74fe0fe85834 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9078,6 +9078,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+       SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++      SND_PCI_QUIRK(0x103c, 0x8bf0, "HP", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
+       SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+-- 
+2.39.0
+
diff --git a/queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-for-a-hp-prob.patch b/queue-5.15/alsa-hda-realtek-fix-mute-micmute-leds-for-a-hp-prob.patch
new file mode 100644 (file)
index 0000000..e50c395
--- /dev/null
@@ -0,0 +1,38 @@
+From a2739279bac7a3b48e5be5b874b4d40407ba163e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Nov 2022 10:28:47 +0800
+Subject: ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook
+
+From: Andy Chi <andy.chi@canonical.com>
+
+[ Upstream commit 1d8025ec722d5e011f9299c46274eb21fb54a428 ]
+
+There is a HP ProBook which using ALC236 codec and need the
+ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF quirk to make mute LED and
+micmute LED work.
+
+Signed-off-by: Andy Chi <andy.chi@canonical.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20221128022849.13759-1-andy.chi@canonical.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index c7321f5842b3..feb337083573 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9076,6 +9076,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
++      SND_PCI_QUIRK(0x103c, 0x8b5d, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
++      SND_PCI_QUIRK(0x103c, 0x8b5e, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+       SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+       SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
+       SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+-- 
+2.39.0
+
diff --git a/queue-5.15/drm-amd-delay-removal-of-the-firmware-framebuffer.patch b/queue-5.15/drm-amd-delay-removal-of-the-firmware-framebuffer.patch
new file mode 100644 (file)
index 0000000..7937c7a
--- /dev/null
@@ -0,0 +1,86 @@
+From cf3b8fb9b542f4bfd3beb12c8f3bb9c1166a3a1d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 18:01:19 -0500
+Subject: drm/amd: Delay removal of the firmware framebuffer
+
+[ Upstream commit 1923bc5a56daeeabd7e9093bad2febcd6af2416a ]
+
+Removing the firmware framebuffer from the driver means that even
+if the driver doesn't support the IP blocks in a GPU it will no
+longer be functional after the driver fails to initialize.
+
+This change will ensure that unsupported IP blocks at least cause
+the driver to work with the EFI framebuffer.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++++++++
+ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    | 6 ------
+ 2 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+index 0d998bc830c2..b5fe2c91f58c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -32,6 +32,7 @@
+ #include <linux/slab.h>
+ #include <linux/pci.h>
++#include <drm/drm_aperture.h>
+ #include <drm/drm_atomic_helper.h>
+ #include <drm/drm_probe_helper.h>
+ #include <drm/amdgpu_drm.h>
+@@ -89,6 +90,8 @@ MODULE_FIRMWARE("amdgpu/yellow_carp_gpu_info.bin");
+ #define AMDGPU_RESUME_MS              2000
++static const struct drm_driver amdgpu_kms_driver;
++
+ const char *amdgpu_asic_name[] = {
+       "TAHITI",
+       "PITCAIRN",
+@@ -3637,6 +3640,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,
+       if (r)
+               return r;
++      /* Get rid of things like offb */
++      r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver);
++      if (r)
++              return r;
++
+       /* doorbell bar mapping and doorbell index init*/
+       amdgpu_device_doorbell_init(adev);
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+index cabbf02eb054..c95cee3d4c9a 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+@@ -23,7 +23,6 @@
+  */
+ #include <drm/amdgpu_drm.h>
+-#include <drm/drm_aperture.h>
+ #include <drm/drm_drv.h>
+ #include <drm/drm_gem.h>
+ #include <drm/drm_vblank.h>
+@@ -2067,11 +2066,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
+       size = pci_resource_len(pdev, 0);
+       is_fw_fb = amdgpu_is_fw_framebuffer(base, size);
+-      /* Get rid of things like offb */
+-      ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver);
+-      if (ret)
+-              return ret;
+-
+       adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);
+       if (IS_ERR(adev))
+               return PTR_ERR(adev);
+-- 
+2.39.0
+
diff --git a/queue-5.15/drm-amdgpu-disable-runtime-pm-on-several-sienna-cich.patch b/queue-5.15/drm-amdgpu-disable-runtime-pm-on-several-sienna-cich.patch
new file mode 100644 (file)
index 0000000..2b295cd
--- /dev/null
@@ -0,0 +1,69 @@
+From c825716dde20494ca5f44c4c81158973736999f4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 27 Apr 2022 15:51:02 +0800
+Subject: drm/amdgpu: disable runtime pm on several sienna cichlid cards(v2)
+
+From: Guchun Chen <guchun.chen@amd.com>
+
+[ Upstream commit d1acd68b2b8924c804e1e3cc1bc5fa4d6b76176c ]
+
+Disable runtime power management on several sienna cichlid
+cards, otherwise SMU will possibly fail to be resumed from
+runtime suspend. Will drop this after a clean solution between
+kernel driver and SMU FW is available.
+
+amdgpu 0000:63:00.0: amdgpu: GECC is enabled
+amdgpu 0000:63:00.0: amdgpu: SECUREDISPLAY: securedisplay ta ucode is not available
+amdgpu 0000:63:00.0: amdgpu: SMU is resuming...
+amdgpu 0000:63:00.0: amdgpu: SMU: I'm not done with your command: SMN_C2PMSG_66:0x0000000E SMN_C2PMSG_82:0x00000080
+amdgpu 0000:63:00.0: amdgpu: Failed to SetDriverDramAddr!
+amdgpu 0000:63:00.0: amdgpu: Failed to setup smc hw!
+[drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block <smu> failed -62
+amdgpu 0000:63:00.0: amdgpu: amdgpu_device_ip_resume failed (-62)
+
+v2: seperate to a function.
+
+Signed-off-by: Guchun Chen <guchun.chen@amd.com>
+Reviewed-by: Evan Quan <evan.quan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 1923bc5a56da ("drm/amd: Delay removal of the firmware framebuffer")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+index 6744427577b3..43e30b9a2e02 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+@@ -43,6 +43,17 @@
+ #include "amdgpu_display.h"
+ #include "amdgpu_ras.h"
++static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev)
++{
++      /*
++       * Add below quirk on several sienna_cichlid cards to disable
++       * runtime pm to fix EMI failures.
++       */
++      if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) ||
++          ((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)))
++              adev->runpm = false;
++}
++
+ void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+ {
+       struct amdgpu_gpu_instance *gpu_instance;
+@@ -201,6 +212,9 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
+                */
+               if (adev->is_fw_fb)
+                       adev->runpm = false;
++
++              amdgpu_runtime_pm_quirk(adev);
++
+               if (adev->runpm)
+                       dev_info(adev->dev, "Using BACO for runtime pm\n");
+       }
+-- 
+2.39.0
+
diff --git a/queue-5.15/efi-fix-userspace-infinite-retry-read-efivars-after-.patch b/queue-5.15/efi-fix-userspace-infinite-retry-read-efivars-after-.patch
new file mode 100644 (file)
index 0000000..4d1096a
--- /dev/null
@@ -0,0 +1,46 @@
+From c368463eedf59bc987512cbbacc2f0609421bfaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Dec 2022 23:09:36 +0800
+Subject: efi: fix userspace infinite retry read efivars after EFI runtime
+ services page fault
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit e006ac3003080177cf0b673441a4241f77aaecce ]
+
+After [1][2], if we catch exceptions due to EFI runtime service, we will
+clear EFI_RUNTIME_SERVICES bit to disable EFI runtime service, then the
+subsequent routine which invoke the EFI runtime service should fail.
+
+But the userspace cat efivars through /sys/firmware/efi/efivars/ will stuck
+and infinite loop calling read() due to efivarfs_file_read() return -EINTR.
+
+The -EINTR is converted from EFI_ABORTED by efi_status_to_err(), and is
+an improper return value in this situation, so let virt_efi_xxx() return
+EFI_DEVICE_ERROR and converted to -EIO to invoker.
+
+Cc: <stable@vger.kernel.org>
+Fixes: 3425d934fc03 ("efi/x86: Handle page faults occurring while running EFI runtime services")
+Fixes: 23715a26c8d8 ("arm64: efi: Recover from synchronous exceptions occurring in firmware")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/efi/runtime-wrappers.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
+index f3e54f6616f0..60075e0e4943 100644
+--- a/drivers/firmware/efi/runtime-wrappers.c
++++ b/drivers/firmware/efi/runtime-wrappers.c
+@@ -62,6 +62,7 @@ struct efi_runtime_work efi_rts_work;
+                                                                       \
+       if (!efi_enabled(EFI_RUNTIME_SERVICES)) {                       \
+               pr_warn_once("EFI Runtime Services are disabled!\n");   \
++              efi_rts_work.status = EFI_DEVICE_ERROR;                 \
+               goto exit;                                              \
+       }                                                               \
+                                                                       \
+-- 
+2.39.0
+
diff --git a/queue-5.15/eventfd-provide-a-eventfd_signal_mask-helper.patch b/queue-5.15/eventfd-provide-a-eventfd_signal_mask-helper.patch
new file mode 100644 (file)
index 0000000..abe4f32
--- /dev/null
@@ -0,0 +1,120 @@
+From 54b5c7b71df2983aa7b663b231216bc6fb51ba6b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 10:13:44 -0700
+Subject: eventfd: provide a eventfd_signal_mask() helper
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 03e02acda8e267a8183e1e0ed289ff1ef9cd7ed8 ]
+
+This is identical to eventfd_signal(), but it allows the caller to pass
+in a mask to be used for the poll wakeup key. The use case is avoiding
+repeated multishot triggers if we have a dependency between eventfd and
+io_uring.
+
+If we setup an eventfd context and register that as the io_uring eventfd,
+and at the same time queue a multishot poll request for the eventfd
+context, then any CQE posted will repeatedly trigger the multishot request
+until it terminates when the CQ ring overflows.
+
+In preparation for io_uring detecting this circular dependency, add the
+mentioned helper so that io_uring can pass in EPOLL_URING as part of the
+poll wakeup key.
+
+Cc: stable@vger.kernel.org # 6.0
+[axboe: fold in !CONFIG_EVENTFD fix from Zhang Qilong]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventfd.c            | 37 +++++++++++++++++++++----------------
+ include/linux/eventfd.h |  7 +++++++
+ 2 files changed, 28 insertions(+), 16 deletions(-)
+
+diff --git a/fs/eventfd.c b/fs/eventfd.c
+index c0ffee99ad23..249ca6c0b784 100644
+--- a/fs/eventfd.c
++++ b/fs/eventfd.c
+@@ -43,21 +43,7 @@ struct eventfd_ctx {
+       int id;
+ };
+-/**
+- * eventfd_signal - Adds @n to the eventfd counter.
+- * @ctx: [in] Pointer to the eventfd context.
+- * @n: [in] Value of the counter to be added to the eventfd internal counter.
+- *          The value cannot be negative.
+- *
+- * This function is supposed to be called by the kernel in paths that do not
+- * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
+- * value, and we signal this as overflow condition by returning a EPOLLERR
+- * to poll(2).
+- *
+- * Returns the amount by which the counter was incremented.  This will be less
+- * than @n if the counter has overflowed.
+- */
+-__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask)
+ {
+       unsigned long flags;
+@@ -78,12 +64,31 @@ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+               n = ULLONG_MAX - ctx->count;
+       ctx->count += n;
+       if (waitqueue_active(&ctx->wqh))
+-              wake_up_locked_poll(&ctx->wqh, EPOLLIN);
++              wake_up_locked_poll(&ctx->wqh, EPOLLIN | mask);
+       current->in_eventfd = 0;
+       spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+       return n;
+ }
++
++/**
++ * eventfd_signal - Adds @n to the eventfd counter.
++ * @ctx: [in] Pointer to the eventfd context.
++ * @n: [in] Value of the counter to be added to the eventfd internal counter.
++ *          The value cannot be negative.
++ *
++ * This function is supposed to be called by the kernel in paths that do not
++ * allow sleeping. In this function we allow the counter to reach the ULLONG_MAX
++ * value, and we signal this as overflow condition by returning a EPOLLERR
++ * to poll(2).
++ *
++ * Returns the amount by which the counter was incremented.  This will be less
++ * than @n if the counter has overflowed.
++ */
++__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
++{
++      return eventfd_signal_mask(ctx, n, 0);
++}
+ EXPORT_SYMBOL_GPL(eventfd_signal);
+ static void eventfd_free_ctx(struct eventfd_ctx *ctx)
+diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
+index 3cd202d3eefb..36a486505b08 100644
+--- a/include/linux/eventfd.h
++++ b/include/linux/eventfd.h
+@@ -40,6 +40,7 @@ struct file *eventfd_fget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fdget(int fd);
+ struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
+ __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
++__u64 eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n, unsigned mask);
+ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait,
+                                 __u64 *cnt);
+ void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt);
+@@ -66,6 +67,12 @@ static inline int eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
+       return -ENOSYS;
+ }
++static inline int eventfd_signal_mask(struct eventfd_ctx *ctx, __u64 n,
++                                    unsigned mask)
++{
++      return -ENOSYS;
++}
++
+ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx)
+ {
+-- 
+2.39.0
+
diff --git a/queue-5.15/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch b/queue-5.15/eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch
new file mode 100644 (file)
index 0000000..e89c674
--- /dev/null
@@ -0,0 +1,119 @@
+From 7ac2c34204d7579365219f9337decd8c82bc08b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 20 Nov 2022 10:10:53 -0700
+Subject: eventpoll: add EPOLL_URING_WAKE poll wakeup flag
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit caf1aeaffc3b09649a56769e559333ae2c4f1802 ]
+
+We can have dependencies between epoll and io_uring. Consider an epoll
+context, identified by the epfd file descriptor, and an io_uring file
+descriptor identified by iofd. If we add iofd to the epfd context, and
+arm a multishot poll request for epfd with iofd, then the multishot
+poll request will repeatedly trigger and generate events until terminated
+by CQ ring overflow. This isn't a desired behavior.
+
+Add EPOLL_URING so that io_uring can pass it in as part of the poll wakeup
+key, and io_uring can check for that to detect a potential recursive
+invocation.
+
+Cc: stable@vger.kernel.org # 6.0
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c                 | 18 ++++++++++--------
+ include/uapi/linux/eventpoll.h |  6 ++++++
+ 2 files changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index cf326c53db0f..1ec197825544 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -484,7 +484,8 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
+  */
+ #ifdef CONFIG_DEBUG_LOCK_ALLOC
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++                           unsigned pollflags)
+ {
+       struct eventpoll *ep_src;
+       unsigned long flags;
+@@ -515,16 +516,17 @@ static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
+       }
+       spin_lock_irqsave_nested(&ep->poll_wait.lock, flags, nests);
+       ep->nests = nests + 1;
+-      wake_up_locked_poll(&ep->poll_wait, EPOLLIN);
++      wake_up_locked_poll(&ep->poll_wait, EPOLLIN | pollflags);
+       ep->nests = 0;
+       spin_unlock_irqrestore(&ep->poll_wait.lock, flags);
+ }
+ #else
+-static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi)
++static void ep_poll_safewake(struct eventpoll *ep, struct epitem *epi,
++                           unsigned pollflags)
+ {
+-      wake_up_poll(&ep->poll_wait, EPOLLIN);
++      wake_up_poll(&ep->poll_wait, EPOLLIN | pollflags);
+ }
+ #endif
+@@ -735,7 +737,7 @@ static void ep_free(struct eventpoll *ep)
+       /* We need to release all tasks waiting for these file */
+       if (waitqueue_active(&ep->poll_wait))
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+       /*
+        * We need to lock this because we could be hit by
+@@ -1201,7 +1203,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, epi);
++              ep_poll_safewake(ep, epi, pollflags & EPOLL_URING_WAKE);
+       if (!(epi->event.events & EPOLLEXCLUSIVE))
+               ewake = 1;
+@@ -1546,7 +1548,7 @@ static int ep_insert(struct eventpoll *ep, const struct epoll_event *event,
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+       return 0;
+ }
+@@ -1622,7 +1624,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi,
+       /* We have to call this outside the lock */
+       if (pwake)
+-              ep_poll_safewake(ep, NULL);
++              ep_poll_safewake(ep, NULL, 0);
+       return 0;
+ }
+diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
+index 8a3432d0f0dc..e687658843b1 100644
+--- a/include/uapi/linux/eventpoll.h
++++ b/include/uapi/linux/eventpoll.h
+@@ -41,6 +41,12 @@
+ #define EPOLLMSG      (__force __poll_t)0x00000400
+ #define EPOLLRDHUP    (__force __poll_t)0x00002000
++/*
++ * Internal flag - wakeup generated by io_uring, used to detect recursion back
++ * into the io_uring poll handler.
++ */
++#define EPOLL_URING_WAKE      ((__force __poll_t)(1U << 27))
++
+ /* Set exclusive wakeup mode for the target file descriptor */
+ #define EPOLLEXCLUSIVE        ((__force __poll_t)(1U << 28))
+-- 
+2.39.0
+
diff --git a/queue-5.15/hugetlb-unshare-some-pmds-when-splitting-vmas.patch b/queue-5.15/hugetlb-unshare-some-pmds-when-splitting-vmas.patch
new file mode 100644 (file)
index 0000000..f902b68
--- /dev/null
@@ -0,0 +1,128 @@
+From 8b58f4839634ed814a53f71c0831a2e4fa0e9292 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 4 Jan 2023 23:19:10 +0000
+Subject: hugetlb: unshare some PMDs when splitting VMAs
+
+From: James Houghton <jthoughton@google.com>
+
+[ Upstream commit b30c14cd61025eeea2f2e8569606cd167ba9ad2d ]
+
+PMD sharing can only be done in PUD_SIZE-aligned pieces of VMAs; however,
+it is possible that HugeTLB VMAs are split without unsharing the PMDs
+first.
+
+Without this fix, it is possible to hit the uffd-wp-related WARN_ON_ONCE
+in hugetlb_change_protection [1].  The key there is that
+hugetlb_unshare_all_pmds will not attempt to unshare PMDs in
+non-PUD_SIZE-aligned sections of the VMA.
+
+It might seem ideal to unshare in hugetlb_vm_op_open, but we need to
+unshare in both the new and old VMAs, so unsharing in hugetlb_vm_op_split
+seems natural.
+
+[1]: https://lore.kernel.org/linux-mm/CADrL8HVeOkj0QH5VZZbRzybNE8CG-tEGFshnA+bG9nMgcWtBSg@mail.gmail.com/
+
+Link: https://lkml.kernel.org/r/20230104231910.1464197-1-jthoughton@google.com
+Fixes: 6dfeaff93be1 ("hugetlb/userfaultfd: unshare all pmds for hugetlbfs when register wp")
+Signed-off-by: James Houghton <jthoughton@google.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Acked-by: Peter Xu <peterx@redhat.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb.c | 44 +++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 35 insertions(+), 9 deletions(-)
+
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index e7bd42f23667..8599f16d4aa4 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -82,6 +82,8 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
+ /* Forward declaration */
+ static int hugetlb_acct_memory(struct hstate *h, long delta);
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++              unsigned long start, unsigned long end);
+ static inline bool subpool_is_free(struct hugepage_subpool *spool)
+ {
+@@ -4164,6 +4166,25 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
+ {
+       if (addr & ~(huge_page_mask(hstate_vma(vma))))
+               return -EINVAL;
++
++      /*
++       * PMD sharing is only possible for PUD_SIZE-aligned address ranges
++       * in HugeTLB VMAs. If we will lose PUD_SIZE alignment due to this
++       * split, unshare PMDs in the PUD_SIZE interval surrounding addr now.
++       */
++      if (addr & ~PUD_MASK) {
++              /*
++               * hugetlb_vm_op_split is called right before we attempt to
++               * split the VMA. We will need to unshare PMDs in the old and
++               * new VMAs, so let's unshare before we split.
++               */
++              unsigned long floor = addr & PUD_MASK;
++              unsigned long ceil = floor + PUD_SIZE;
++
++              if (floor >= vma->vm_start && ceil <= vma->vm_end)
++                      hugetlb_unshare_pmds(vma, floor, ceil);
++      }
++
+       return 0;
+ }
+@@ -6349,26 +6370,21 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+       }
+ }
+-/*
+- * This function will unconditionally remove all the shared pmd pgtable entries
+- * within the specific vma for a hugetlbfs memory range.
+- */
+-void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
++static void hugetlb_unshare_pmds(struct vm_area_struct *vma,
++                                 unsigned long start,
++                                 unsigned long end)
+ {
+       struct hstate *h = hstate_vma(vma);
+       unsigned long sz = huge_page_size(h);
+       struct mm_struct *mm = vma->vm_mm;
+       struct mmu_notifier_range range;
+-      unsigned long address, start, end;
++      unsigned long address;
+       spinlock_t *ptl;
+       pte_t *ptep;
+       if (!(vma->vm_flags & VM_MAYSHARE))
+               return;
+-      start = ALIGN(vma->vm_start, PUD_SIZE);
+-      end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
+-
+       if (start >= end)
+               return;
+@@ -6400,6 +6416,16 @@ void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+       mmu_notifier_invalidate_range_end(&range);
+ }
++/*
++ * This function will unconditionally remove all the shared pmd pgtable entries
++ * within the specific vma for a hugetlbfs memory range.
++ */
++void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
++{
++      hugetlb_unshare_pmds(vma, ALIGN(vma->vm_start, PUD_SIZE),
++                      ALIGN_DOWN(vma->vm_end, PUD_SIZE));
++}
++
+ #ifdef CONFIG_CMA
+ static bool cma_reserve_called __initdata;
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-add-flag-for-disabling-provided-buffer-recy.patch b/queue-5.15/io_uring-add-flag-for-disabling-provided-buffer-recy.patch
new file mode 100644 (file)
index 0000000..b1427bd
--- /dev/null
@@ -0,0 +1,60 @@
+From e9452c133eee9a693368f144cdbea07cfe5fe492 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Mar 2022 09:30:05 -0600
+Subject: io_uring: add flag for disabling provided buffer recycling
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 8a3e8ee56417f5e0e66580d93941ed9d6f4c8274 upstream.
+
+If we need to continue doing this IO, then we don't want a potentially
+selected buffer recycled. Add a flag for that.
+
+Set this for recv/recvmsg if they do partial IO.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 04441e981624..2350d43aa782 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -740,6 +740,7 @@ enum {
+       REQ_F_CREDS_BIT,
+       REQ_F_REFCOUNT_BIT,
+       REQ_F_ARM_LTIMEOUT_BIT,
++      REQ_F_PARTIAL_IO_BIT,
+       /* keep async read/write and isreg together and in order */
+       REQ_F_NOWAIT_READ_BIT,
+       REQ_F_NOWAIT_WRITE_BIT,
+@@ -795,6 +796,8 @@ enum {
+       REQ_F_REFCOUNT          = BIT(REQ_F_REFCOUNT_BIT),
+       /* there is a linked timeout that has to be armed */
+       REQ_F_ARM_LTIMEOUT      = BIT(REQ_F_ARM_LTIMEOUT_BIT),
++      /* request has already done partial IO */
++      REQ_F_PARTIAL_IO        = BIT(REQ_F_PARTIAL_IO_BIT),
+ };
+ struct async_poll {
+@@ -5123,6 +5126,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       ret = -EINTR;
+               if (ret > 0 && io_net_retry(sock, flags)) {
+                       sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
+                       return io_setup_async_msg(req, kmsg);
+               }
+               req_set_fail(req);
+@@ -5196,6 +5200,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+                       sr->len -= ret;
+                       sr->buf += ret;
+                       sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
+                       return -EAGAIN;
+               }
+               req_set_fail(req);
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-allow-re-poll-if-we-made-progress.patch b/queue-5.15/io_uring-allow-re-poll-if-we-made-progress.patch
new file mode 100644 (file)
index 0000000..f543583
--- /dev/null
@@ -0,0 +1,53 @@
+From 5e575c1b387f1229b83e69da35f905ead24efd01 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 10:39:22 -0700
+Subject: io_uring: allow re-poll if we made progress
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 10c873334febaeea9aa0c25c10b5ac0951b77a5f upstream.
+
+We currently check REQ_F_POLLED before arming async poll for a
+notification to retry. If it's set, then we don't allow poll and will
+punt to io-wq instead. This is done to prevent a situation where a buggy
+driver will repeatedly return that there's space/data available yet we
+get -EAGAIN.
+
+However, if we already transferred data, then it should be safe to rely
+on poll again. Gate the check on whether or not REQ_F_PARTIAL_IO is
+also set.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 3fb76863fed4..997a7264e1d4 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5853,7 +5853,7 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+       if (!req->file || !file_can_poll(req->file))
+               return IO_APOLL_ABORTED;
+-      if (req->flags & REQ_F_POLLED)
++      if ((req->flags & (REQ_F_POLLED|REQ_F_PARTIAL_IO)) == REQ_F_POLLED)
+               return IO_APOLL_ABORTED;
+       if (!def->pollin && !def->pollout)
+               return IO_APOLL_ABORTED;
+@@ -5869,7 +5869,10 @@ static int io_arm_poll_handler(struct io_kiocb *req)
+               mask |= POLLOUT | POLLWRNORM;
+       }
+-      apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
++      if (req->flags & REQ_F_POLLED)
++              apoll = req->apoll;
++      else
++              apoll = kmalloc(sizeof(*apoll), GFP_ATOMIC);
+       if (unlikely(!apoll))
+               return IO_APOLL_ABORTED;
+       apoll->double_poll = NULL;
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-do-not-recalculate-ppos-unnecessarily.patch b/queue-5.15/io_uring-do-not-recalculate-ppos-unnecessarily.patch
new file mode 100644 (file)
index 0000000..a84f710
--- /dev/null
@@ -0,0 +1,100 @@
+From 7fb0202e019fb7add27e0d80961adf708996a5b9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:03 -0800
+Subject: io_uring: do not recalculate ppos unnecessarily
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit b4aec40015953b65f2f114641e7fd7714c8df8e6 upstream.
+
+There is a slight optimisation to be had by calculating the correct pos
+pointer inside io_kiocb_update_pos and then using that later.
+
+It seems code size drops by a bit:
+000000000000a1b0 0000000000000400 t io_read
+000000000000a5b0 0000000000000319 t io_write
+
+vs
+000000000000a1b0 00000000000003f6 t io_read
+000000000000a5b0 0000000000000310 t io_write
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index d9396cfaa4f3..73d261004c4a 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3003,18 +3003,22 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+       }
+ }
+-static inline void io_kiocb_update_pos(struct io_kiocb *req)
++static inline loff_t *io_kiocb_update_pos(struct io_kiocb *req)
+ {
+       struct kiocb *kiocb = &req->rw.kiocb;
++      bool is_stream = req->file->f_mode & FMODE_STREAM;
+       if (kiocb->ki_pos == -1) {
+-              if (!(req->file->f_mode & FMODE_STREAM)) {
++              if (!is_stream) {
+                       req->flags |= REQ_F_CUR_POS;
+                       kiocb->ki_pos = req->file->f_pos;
++                      return &kiocb->ki_pos;
+               } else {
+                       kiocb->ki_pos = 0;
++                      return NULL;
+               }
+       }
++      return is_stream ? NULL : &kiocb->ki_pos;
+ }
+ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+@@ -3540,6 +3544,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
++      loff_t *ppos;
+       if (rw) {
+               iter = &rw->iter;
+@@ -3572,9 +3577,9 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+               return ret ?: -EAGAIN;
+       }
+-      io_kiocb_update_pos(req);
++      ppos = io_kiocb_update_pos(req);
+-      ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
++      ret = rw_verify_area(READ, req->file, ppos, req->result);
+       if (unlikely(ret)) {
+               kfree(iovec);
+               return ret;
+@@ -3678,6 +3683,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+       struct iov_iter_state __state, *state;
+       ssize_t ret, ret2;
++      loff_t *ppos;
+       if (rw) {
+               iter = &rw->iter;
+@@ -3708,9 +3714,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+           (req->flags & REQ_F_ISREG))
+               goto copy_iov;
+-      io_kiocb_update_pos(req);
++      ppos = io_kiocb_update_pos(req);
+-      ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
++      ret = rw_verify_area(WRITE, req->file, ppos, req->result);
+       if (unlikely(ret))
+               goto out_free;
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch b/queue-5.15/io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch
new file mode 100644 (file)
index 0000000..8cc7577
--- /dev/null
@@ -0,0 +1,47 @@
+From a1d18b218b3c9ad3c44f17686ed39780d552a243 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 Jan 2023 20:50:24 -0700
+Subject: io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 46a525e199e4037516f7e498c18f065b09df32ac upstream.
+
+This isn't a reliable mechanism to tell if we have task_work pending, we
+really should be looking at whether we have any items queued. This is
+problematic if forward progress is gated on running said task_work. One
+such example is reading from a pipe, where the write side has been closed
+right before the read is started. The fput() of the file queues TWA_RESUME
+task_work, and we need that task_work to be run before ->release() is
+called for the pipe. If ->release() isn't called, then the read will sit
+forever waiting on data that will never arise.
+
+Fix this by io_run_task_work() so it checks if we have task_work pending
+rather than rely on TIF_NOTIFY_SIGNAL for that. The latter obviously
+doesn't work for task_work that is queued without TWA_SIGNAL.
+
+Reported-by: Christiano Haesbaert <haesbaert@haesbaert.org>
+Cc: stable@vger.kernel.org
+Link: https://github.com/axboe/liburing/issues/665
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io-wq.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/io_uring/io-wq.c b/io_uring/io-wq.c
+index 87bc38b47103..81485c1a9879 100644
+--- a/io_uring/io-wq.c
++++ b/io_uring/io-wq.c
+@@ -513,7 +513,7 @@ static struct io_wq_work *io_get_next_work(struct io_wqe_acct *acct,
+ static bool io_flush_signals(void)
+ {
+-      if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
++      if (test_thread_flag(TIF_NOTIFY_SIGNAL) || current->task_works) {
+               __set_current_state(TASK_RUNNING);
+               tracehook_notify_signal();
+               return true;
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch b/queue-5.15/io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch
new file mode 100644 (file)
index 0000000..6c18140
--- /dev/null
@@ -0,0 +1,107 @@
+From 512090d1131532991640fe4a0a1bf5a72e9f451b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 10:21:22 -0700
+Subject: io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 7ba89d2af17aa879dda30f5d5d3f152e587fc551 upstream.
+
+We currently don't attempt to get the full asked for length even if
+MSG_WAITALL is set, if we get a partial receive. If we do see a partial
+receive, then just note how many bytes we did and return -EAGAIN to
+get it retried.
+
+The iov is advanced appropriately for the vector based case, and we
+manually bump the buffer and remainder for the non-vector case.
+
+Cc: stable@vger.kernel.org
+Reported-by: Constantine Gavrilov <constantine.gavrilov@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 28 ++++++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 93023562d548..04441e981624 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -578,6 +578,7 @@ struct io_sr_msg {
+       int                             msg_flags;
+       int                             bgid;
+       size_t                          len;
++      size_t                          done_io;
+       struct io_buffer                *kbuf;
+ };
+@@ -5063,12 +5064,21 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+ #endif
++      sr->done_io = 0;
+       return 0;
+ }
++static bool io_net_retry(struct socket *sock, int flags)
++{
++      if (!(flags & MSG_WAITALL))
++              return false;
++      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
++}
++
+ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
++      struct io_sr_msg *sr = &req->sr_msg;
+       struct socket *sock;
+       struct io_buffer *kbuf;
+       unsigned flags;
+@@ -5111,6 +5121,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->done_io += ret;
++                      return io_setup_async_msg(req, kmsg);
++              }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+               req_set_fail(req);
+@@ -5122,6 +5136,10 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+@@ -5174,12 +5192,22 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->len -= ret;
++                      sr->buf += ret;
++                      sr->done_io += ret;
++                      return -EAGAIN;
++              }
+               req_set_fail(req);
+       } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
+               req_set_fail(req);
+       }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-ensure-that-cached-task-references-are-alwa.patch b/queue-5.15/io_uring-ensure-that-cached-task-references-are-alwa.patch
new file mode 100644 (file)
index 0000000..8c44856
--- /dev/null
@@ -0,0 +1,55 @@
+From dccf54bc7a377c5df8da5d2dcf86ac76bf255e61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 12:36:08 -0700
+Subject: io_uring: ensure that cached task references are always put on exit
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit e775f93f2ab976a2cdb4a7b53063cbe890904f73 upstream.
+
+io_uring caches task references to avoid doing atomics for each of them
+per request. If a request is put from the same task that allocated it,
+then we can maintain a per-ctx cache of them. This obviously relies
+on io_uring always pruning caches in a reliable way, and there's
+currently a case off io_uring fd release where we can miss that.
+
+One example is a ring setup with IOPOLL, which relies on the task
+polling for completions, which will free them. However, if such a task
+submits a request and then exits or closes the ring without reaping
+the completion, then ring release will reap and put. If release happens
+from that very same task, the completed request task refs will get
+put back into the cache pool. This is problematic, as we're now beyond
+the point of pruning caches.
+
+Manually drop these caches after doing an IOPOLL reap. This releases
+references from the current task, which is enough. If another task
+happens to be doing the release, then the caching will not be
+triggered and there's no issue.
+
+Cc: stable@vger.kernel.org
+Fixes: e98e49b2bbf7 ("io_uring: extend task put optimisations")
+Reported-by: Homin Rhee <hominlab@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index e1e15d40d758..2caef6417260 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -9684,6 +9684,10 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
+       /* if we failed setting up the ctx, we might not have any rings */
+       io_iopoll_try_reap_events(ctx);
++      /* drop cached put refs after potentially doing completions */
++      if (current->io_uring)
++              io_uring_drop_tctx_refs(current);
++
+       INIT_WORK(&ctx->exit_work, io_ring_exit_work);
+       /*
+        * Use system_unbound_wq to avoid spawning tons of event kworkers
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-fix-async-accept-on-o_nonblock-sockets.patch b/queue-5.15/io_uring-fix-async-accept-on-o_nonblock-sockets.patch
new file mode 100644 (file)
index 0000000..bd6707c
--- /dev/null
@@ -0,0 +1,50 @@
+From ca760f9e0df16e3351879479cdb05b61ee1789fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 09:13:12 -0700
+Subject: io_uring: fix async accept on O_NONBLOCK sockets
+
+From: Dylan Yudaken <dylany@meta.com>
+
+commit a73825ba70c93e1eb39a845bb3d9885a787f8ffe upstream.
+
+Do not set REQ_F_NOWAIT if the socket is non blocking. When enabled this
+causes the accept to immediately post a CQE with EAGAIN, which means you
+cannot perform an accept SQE on a NONBLOCK socket asynchronously.
+
+By removing the flag if there is no pending accept then poll is armed as
+usual and when a connection comes in the CQE is posted.
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Link: https://lore.kernel.org/r/20220324143435.2875844-1-dylany@fb.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 997a7264e1d4..e1e15d40d758 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -5272,9 +5272,6 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+       struct file *file;
+       int ret, fd;
+-      if (req->file->f_flags & O_NONBLOCK)
+-              req->flags |= REQ_F_NOWAIT;
+-
+       if (!fixed) {
+               fd = __get_unused_fd_flags(accept->flags, accept->nofile);
+               if (unlikely(fd < 0))
+@@ -5286,6 +5283,8 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
+               if (!fixed)
+                       put_unused_fd(fd);
+               ret = PTR_ERR(file);
++              /* safe to retry */
++              req->flags |= REQ_F_PARTIAL_IO;
+               if (ret == -EAGAIN && force_nonblock)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-improve-send-recv-error-handling.patch b/queue-5.15/io_uring-improve-send-recv-error-handling.patch
new file mode 100644 (file)
index 0000000..ee2387d
--- /dev/null
@@ -0,0 +1,126 @@
+From 7ec39780241bf364da91b8ac8da42370c0ae0a0d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 Nov 2021 00:07:47 +0000
+Subject: io_uring: improve send/recv error handling
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+commit 7297ce3d59449de49d3c9e1f64ae25488750a1fc upstream.
+
+Hide all error handling under common if block, removes two extra ifs on
+the success path and keeps the handling more condensed.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/5761545158a12968f3caf30f747eea65ed75dfc1.1637524285.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 55 +++++++++++++++++++++++++--------------------
+ 1 file changed, 31 insertions(+), 24 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index d855e668f37c..93023562d548 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4866,17 +4866,18 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+       ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
+-      if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+-              return io_setup_async_msg(req, kmsg);
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++                      return io_setup_async_msg(req, kmsg);
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      }
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+-      if (ret < min_ret)
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4912,13 +4913,13 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
+       msg.msg_flags = flags;
+       ret = sock_sendmsg(sock, &msg);
+-      if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
+-              return -EAGAIN;
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
+-
+-      if (ret < min_ret)
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && (issue_flags & IO_URING_F_NONBLOCK))
++                      return -EAGAIN;
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
+               req_set_fail(req);
++      }
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -5105,10 +5106,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
+                                       kmsg->uaddr, flags);
+-      if (force_nonblock && ret == -EAGAIN)
+-              return io_setup_async_msg(req, kmsg);
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && force_nonblock)
++                      return io_setup_async_msg(req, kmsg);
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      } else if ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++              req_set_fail(req);
++      }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
+@@ -5116,8 +5122,6 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
+-      if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+@@ -5164,15 +5168,18 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
+               min_ret = iov_iter_count(&msg.msg_iter);
+       ret = sock_recvmsg(sock, &msg, flags);
+-      if (force_nonblock && ret == -EAGAIN)
+-              return -EAGAIN;
+-      if (ret == -ERESTARTSYS)
+-              ret = -EINTR;
+ out_free:
++      if (ret < min_ret) {
++              if (ret == -EAGAIN && force_nonblock)
++                      return -EAGAIN;
++              if (ret == -ERESTARTSYS)
++                      ret = -EINTR;
++              req_set_fail(req);
++      } else if ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))) {
++              req_set_fail(req);
++      }
+       if (req->flags & REQ_F_BUFFER_SELECTED)
+               cflags = io_put_recv_kbuf(req);
+-      if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
+-              req_set_fail(req);
+       __io_req_complete(req, issue_flags, ret, cflags);
+       return 0;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch b/queue-5.15/io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch
new file mode 100644 (file)
index 0000000..e6147b8
--- /dev/null
@@ -0,0 +1,87 @@
+From 9408d99dda879989d1b7d1084aeed040eac529fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 23 Dec 2022 07:04:49 -0700
+Subject: io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 4464853277d0ccdb9914608dd1332f0fa2f9846f ]
+
+Pass in EPOLL_URING_WAKE when signaling eventfd or doing poll related
+wakups, so that we can check for a circular event dependency between
+eventfd and epoll. If this flag is set when our wakeup handlers are
+called, then we know we have a dependency that needs to terminate
+multishot requests.
+
+eventfd and epoll are the only such possible dependencies.
+
+Cc: stable@vger.kernel.org # 6.0
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 27 ++++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 7 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 9a01188ff45a..d855e668f37c 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -1629,13 +1629,15 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx)
+        * wake as many waiters as we need to.
+        */
+       if (wq_has_sleeper(&ctx->cq_wait))
+-              wake_up_all(&ctx->cq_wait);
++              __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
++                              poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
+       if (ctx->sq_data && waitqueue_active(&ctx->sq_data->wait))
+               wake_up(&ctx->sq_data->wait);
+       if (io_should_trigger_evfd(ctx))
+-              eventfd_signal(ctx->cq_ev_fd, 1);
++              eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE);
+       if (waitqueue_active(&ctx->poll_wait))
+-              wake_up_interruptible(&ctx->poll_wait);
++              __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0,
++                              poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
+ }
+ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
+@@ -1645,12 +1647,14 @@ static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
+       if (ctx->flags & IORING_SETUP_SQPOLL) {
+               if (waitqueue_active(&ctx->cq_wait))
+-                      wake_up_all(&ctx->cq_wait);
++                      __wake_up(&ctx->cq_wait, TASK_NORMAL, 0,
++                                poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
+       }
+       if (io_should_trigger_evfd(ctx))
+-              eventfd_signal(ctx->cq_ev_fd, 1);
++              eventfd_signal_mask(ctx->cq_ev_fd, 1, EPOLL_URING_WAKE);
+       if (waitqueue_active(&ctx->poll_wait))
+-              wake_up_interruptible(&ctx->poll_wait);
++              __wake_up(&ctx->poll_wait, TASK_INTERRUPTIBLE, 0,
++                              poll_to_key(EPOLL_URING_WAKE | EPOLLIN));
+ }
+ /* Returns true if there are no backlogged entries after the flush */
+@@ -5636,8 +5640,17 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+       if (mask && !(mask & poll->events))
+               return 0;
+-      if (io_poll_get_ownership(req))
++      if (io_poll_get_ownership(req)) {
++              /*
++               * If we trigger a multishot poll off our own wakeup path,
++               * disable multishot as there is a circular dependency between
++               * CQ posting and triggering the event.
++               */
++              if (mask & EPOLL_URING_WAKE)
++                      poll->events |= EPOLLONESHOT;
++
+               __io_poll_execute(req, mask);
++      }
+       return 1;
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch b/queue-5.15/io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch
new file mode 100644 (file)
index 0000000..e8a3c82
--- /dev/null
@@ -0,0 +1,65 @@
+From 014aa661545c102fdef146dc638677d98689a1e4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:01 -0800
+Subject: io_uring: remove duplicated calls to io_kiocb_ppos
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit af9c45ecebaf1b428306f41421f4bcffe439f735 upstream.
+
+io_kiocb_ppos is called in both branches, and it seems that the compiler
+does not fuse this. Fusing removes a few bytes from loop_rw_iter.
+
+Before:
+$ nm -S fs/io_uring.o | grep loop_rw_iter
+0000000000002430 0000000000000124 t loop_rw_iter
+
+After:
+$ nm -S fs/io_uring.o | grep loop_rw_iter
+0000000000002430 000000000000010d t loop_rw_iter
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 2caef6417260..14297add8485 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -3303,6 +3303,7 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+       struct kiocb *kiocb = &req->rw.kiocb;
+       struct file *file = req->file;
+       ssize_t ret = 0;
++      loff_t *ppos;
+       /*
+        * Don't support polled IO through this interface, and we can't
+@@ -3314,6 +3315,8 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+       if (kiocb->ki_flags & IOCB_NOWAIT)
+               return -EAGAIN;
++      ppos = io_kiocb_ppos(kiocb);
++
+       while (iov_iter_count(iter)) {
+               struct iovec iovec;
+               ssize_t nr;
+@@ -3327,10 +3330,10 @@ static ssize_t loop_rw_iter(int rw, struct io_kiocb *req, struct iov_iter *iter)
+               if (rw == READ) {
+                       nr = file->f_op->read(file, iovec.iov_base,
+-                                            iovec.iov_len, io_kiocb_ppos(kiocb));
++                                            iovec.iov_len, ppos);
+               } else {
+                       nr = file->f_op->write(file, iovec.iov_base,
+-                                             iovec.iov_len, io_kiocb_ppos(kiocb));
++                                             iovec.iov_len, ppos);
+               }
+               if (nr < 0) {
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-rw-defer-fsnotify-calls-to-task-context.patch b/queue-5.15/io_uring-rw-defer-fsnotify-calls-to-task-context.patch
new file mode 100644 (file)
index 0000000..f9854a3
--- /dev/null
@@ -0,0 +1,122 @@
+From d555c91cfc2f24961e823f0c717e2fcf92a8ed6a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 Jan 2023 13:38:51 -0700
+Subject: io_uring/rw: defer fsnotify calls to task context
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit b000145e9907809406d8164c3b2b8861d95aecd1 upstream.
+
+We can't call these off the kiocb completion as that might be off
+soft/hard irq context. Defer the calls to when we process the
+task_work for this request. That avoids valid complaints like:
+
+stack backtrace:
+CPU: 1 PID: 0 Comm: swapper/1 Not tainted 6.0.0-rc6-syzkaller-00321-g105a36f3694e #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/26/2022
+Call Trace:
+ <IRQ>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_usage_bug kernel/locking/lockdep.c:3961 [inline]
+ valid_state kernel/locking/lockdep.c:3973 [inline]
+ mark_lock_irq kernel/locking/lockdep.c:4176 [inline]
+ mark_lock.part.0.cold+0x18/0xd8 kernel/locking/lockdep.c:4632
+ mark_lock kernel/locking/lockdep.c:4596 [inline]
+ mark_usage kernel/locking/lockdep.c:4527 [inline]
+ __lock_acquire+0x11d9/0x56d0 kernel/locking/lockdep.c:5007
+ lock_acquire kernel/locking/lockdep.c:5666 [inline]
+ lock_acquire+0x1ab/0x570 kernel/locking/lockdep.c:5631
+ __fs_reclaim_acquire mm/page_alloc.c:4674 [inline]
+ fs_reclaim_acquire+0x115/0x160 mm/page_alloc.c:4688
+ might_alloc include/linux/sched/mm.h:271 [inline]
+ slab_pre_alloc_hook mm/slab.h:700 [inline]
+ slab_alloc mm/slab.c:3278 [inline]
+ __kmem_cache_alloc_lru mm/slab.c:3471 [inline]
+ kmem_cache_alloc+0x39/0x520 mm/slab.c:3491
+ fanotify_alloc_fid_event fs/notify/fanotify/fanotify.c:580 [inline]
+ fanotify_alloc_event fs/notify/fanotify/fanotify.c:813 [inline]
+ fanotify_handle_event+0x1130/0x3f40 fs/notify/fanotify/fanotify.c:948
+ send_to_group fs/notify/fsnotify.c:360 [inline]
+ fsnotify+0xafb/0x1680 fs/notify/fsnotify.c:570
+ __fsnotify_parent+0x62f/0xa60 fs/notify/fsnotify.c:230
+ fsnotify_parent include/linux/fsnotify.h:77 [inline]
+ fsnotify_file include/linux/fsnotify.h:99 [inline]
+ fsnotify_access include/linux/fsnotify.h:309 [inline]
+ __io_complete_rw_common+0x485/0x720 io_uring/rw.c:195
+ io_complete_rw+0x1a/0x1f0 io_uring/rw.c:228
+ iomap_dio_complete_work fs/iomap/direct-io.c:144 [inline]
+ iomap_dio_bio_end_io+0x438/0x5e0 fs/iomap/direct-io.c:178
+ bio_endio+0x5f9/0x780 block/bio.c:1564
+ req_bio_endio block/blk-mq.c:695 [inline]
+ blk_update_request+0x3fc/0x1300 block/blk-mq.c:825
+ scsi_end_request+0x7a/0x9a0 drivers/scsi/scsi_lib.c:541
+ scsi_io_completion+0x173/0x1f70 drivers/scsi/scsi_lib.c:971
+ scsi_complete+0x122/0x3b0 drivers/scsi/scsi_lib.c:1438
+ blk_complete_reqs+0xad/0xe0 block/blk-mq.c:1022
+ __do_softirq+0x1d3/0x9c6 kernel/softirq.c:571
+ invoke_softirq kernel/softirq.c:445 [inline]
+ __irq_exit_rcu+0x123/0x180 kernel/softirq.c:650
+ irq_exit_rcu+0x5/0x20 kernel/softirq.c:662
+ common_interrupt+0xa9/0xc0 arch/x86/kernel/irq.c:240
+
+Fixes: f63cf5192fe3 ("io_uring: ensure that fsnotify is always called")
+Link: https://lore.kernel.org/all/20220929135627.ykivmdks2w5vzrwg@quack3/
+Reported-by: syzbot+dfcc5f4da15868df7d4d@syzkaller.appspotmail.com
+Reported-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 22 +++++++++++++++-------
+ 1 file changed, 15 insertions(+), 7 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 73d261004c4a..78ed38d778f8 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2705,12 +2705,6 @@ static bool io_rw_should_reissue(struct io_kiocb *req)
+ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
+ {
+-      if (req->rw.kiocb.ki_flags & IOCB_WRITE) {
+-              kiocb_end_write(req);
+-              fsnotify_modify(req->file);
+-      } else {
+-              fsnotify_access(req->file);
+-      }
+       if (res != req->result) {
+               if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
+                   io_rw_should_reissue(req)) {
+@@ -2763,6 +2757,20 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
+       __io_req_complete(req, issue_flags, io_fixup_rw_res(req, res), io_put_rw_kbuf(req));
+ }
++static void io_req_rw_complete(struct io_kiocb *req, bool *locked)
++{
++      struct io_rw *rw = &req->rw;
++
++      if (rw->kiocb.ki_flags & IOCB_WRITE) {
++              kiocb_end_write(req);
++              fsnotify_modify(req->file);
++      } else {
++              fsnotify_access(req->file);
++      }
++
++      io_req_task_complete(req, locked);
++}
++
+ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+ {
+       struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
+@@ -2770,7 +2778,7 @@ static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
+       if (__io_complete_rw_common(req, res))
+               return;
+       req->result = io_fixup_rw_res(req, res);
+-      req->io_task_work.func = io_req_task_complete;
++      req->io_task_work.func = io_req_rw_complete;
+       io_req_task_work_add(req);
+ }
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch b/queue-5.15/io_uring-support-msg_waitall-for-ioring_op_send-msg.patch
new file mode 100644 (file)
index 0000000..3f188b8
--- /dev/null
@@ -0,0 +1,111 @@
+From 02dc72701a3f7be6535e276753836451b95360bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Apr 2022 19:21:36 -0600
+Subject: io_uring: support MSG_WAITALL for IORING_OP_SEND(MSG)
+
+From: Jens Axboe <axboe@kernel.dk>
+
+commit 4c3c09439c08b03d9503df0ca4c7619c5842892e upstream.
+
+Like commit 7ba89d2af17a for recv/recvmsg, support MSG_WAITALL for the
+send side. If this flag is set and we do a short send, retry for a
+stream of seqpacket socket.
+
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 36 +++++++++++++++++++++++++++++-------
+ 1 file changed, 29 insertions(+), 7 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 2350d43aa782..3fb76863fed4 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -4777,6 +4777,13 @@ static int io_sync_file_range(struct io_kiocb *req, unsigned int issue_flags)
+ }
+ #if defined(CONFIG_NET)
++static bool io_net_retry(struct socket *sock, int flags)
++{
++      if (!(flags & MSG_WAITALL))
++              return false;
++      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
++}
++
+ static int io_setup_async_msg(struct io_kiocb *req,
+                             struct io_async_msghdr *kmsg)
+ {
+@@ -4840,12 +4847,14 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       if (req->ctx->compat)
+               sr->msg_flags |= MSG_CMSG_COMPAT;
+ #endif
++      sr->done_io = 0;
+       return 0;
+ }
+ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
++      struct io_sr_msg *sr = &req->sr_msg;
+       struct socket *sock;
+       unsigned flags;
+       int min_ret = 0;
+@@ -4876,12 +4885,21 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
+                       return io_setup_async_msg(req, kmsg);
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
++                      return io_setup_async_msg(req, kmsg);
++              }
+               req_set_fail(req);
+       }
+       /* fast path, check for non-NULL to avoid function call */
+       if (kmsg->free_iov)
+               kfree(kmsg->free_iov);
+       req->flags &= ~REQ_F_NEED_CLEANUP;
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -4922,8 +4940,19 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
+                       return -EAGAIN;
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
++              if (ret > 0 && io_net_retry(sock, flags)) {
++                      sr->len -= ret;
++                      sr->buf += ret;
++                      sr->done_io += ret;
++                      req->flags |= REQ_F_PARTIAL_IO;
++                      return -EAGAIN;
++              }
+               req_set_fail(req);
+       }
++      if (ret >= 0)
++              ret += sr->done_io;
++      else if (sr->done_io)
++              ret = sr->done_io;
+       __io_req_complete(req, issue_flags, ret, 0);
+       return 0;
+ }
+@@ -5071,13 +5100,6 @@ static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+       return 0;
+ }
+-static bool io_net_retry(struct socket *sock, int flags)
+-{
+-      if (!(flags & MSG_WAITALL))
+-              return false;
+-      return sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET;
+-}
+-
+ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_async_msghdr iomsg, *kmsg;
+-- 
+2.39.0
+
diff --git a/queue-5.15/io_uring-update-kiocb-ki_pos-at-execution-time.patch b/queue-5.15/io_uring-update-kiocb-ki_pos-at-execution-time.patch
new file mode 100644 (file)
index 0000000..3574360
--- /dev/null
@@ -0,0 +1,86 @@
+From 3fb298fa47c2bbf1201d03fd211c893c7c71643e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Feb 2022 02:55:02 -0800
+Subject: io_uring: update kiocb->ki_pos at execution time
+
+From: Dylan Yudaken <dylany@fb.com>
+
+commit d34e1e5b396a0dbaa4a29b7138df662cfb9d8e8e upstream.
+
+Update kiocb->ki_pos at execution time rather than in io_prep_rw().
+io_prep_rw() happens before the job is enqueued to a worker and so the
+offset might be read multiple times before being executed once.
+
+Ensures that the file position in a set of _linked_ SQEs will be only
+obtained after earlier SQEs have completed, and so will include their
+incremented file position.
+
+Signed-off-by: Dylan Yudaken <dylany@fb.com>
+Reviewed-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/io_uring.c | 26 ++++++++++++++++++--------
+ 1 file changed, 18 insertions(+), 8 deletions(-)
+
+diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
+index 14297add8485..d9396cfaa4f3 100644
+--- a/io_uring/io_uring.c
++++ b/io_uring/io_uring.c
+@@ -2922,14 +2922,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
+               req->flags |= REQ_F_ISREG;
+       kiocb->ki_pos = READ_ONCE(sqe->off);
+-      if (kiocb->ki_pos == -1) {
+-              if (!(file->f_mode & FMODE_STREAM)) {
+-                      req->flags |= REQ_F_CUR_POS;
+-                      kiocb->ki_pos = file->f_pos;
+-              } else {
+-                      kiocb->ki_pos = 0;
+-              }
+-      }
+       kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp));
+       kiocb->ki_flags = iocb_flags(kiocb->ki_filp);
+       ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
+@@ -3011,6 +3003,20 @@ static inline void io_rw_done(struct kiocb *kiocb, ssize_t ret)
+       }
+ }
++static inline void io_kiocb_update_pos(struct io_kiocb *req)
++{
++      struct kiocb *kiocb = &req->rw.kiocb;
++
++      if (kiocb->ki_pos == -1) {
++              if (!(req->file->f_mode & FMODE_STREAM)) {
++                      req->flags |= REQ_F_CUR_POS;
++                      kiocb->ki_pos = req->file->f_pos;
++              } else {
++                      kiocb->ki_pos = 0;
++              }
++      }
++}
++
+ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
+                      unsigned int issue_flags)
+ {
+@@ -3566,6 +3572,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
+               return ret ?: -EAGAIN;
+       }
++      io_kiocb_update_pos(req);
++
+       ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
+       if (unlikely(ret)) {
+               kfree(iovec);
+@@ -3700,6 +3708,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
+           (req->flags & REQ_F_ISREG))
+               goto copy_iov;
++      io_kiocb_update_pos(req);
++
+       ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
+       if (unlikely(ret))
+               goto out_free;
+-- 
+2.39.0
+
index 9b18c26d0594e849f235f65f9c42a03ba8ccab47..af7f23ebe12bf0798c687a2c5635e3915d32294b 100644 (file)
@@ -18,3 +18,24 @@ wifi-mac80211-sdata-can-be-null-during-ampdu-start.patch
 add-exception-protection-processing-for-vd-in-axi_chan_handle_err-function.patch
 zonefs-detect-append-writes-at-invalid-locations.patch
 nilfs2-fix-general-protection-fault-in-nilfs_btree_insert.patch
+efi-fix-userspace-infinite-retry-read-efivars-after-.patch
+alsa-hda-realtek-fix-mute-micmute-leds-for-a-hp-prob.patch
+alsa-hda-realtek-fix-mute-micmute-leds-don-t-work-fo.patch
+drm-amdgpu-disable-runtime-pm-on-several-sienna-cich.patch
+drm-amd-delay-removal-of-the-firmware-framebuffer.patch
+hugetlb-unshare-some-pmds-when-splitting-vmas.patch
+io_uring-don-t-gate-task_work-run-on-tif_notify_sign.patch
+eventpoll-add-epoll_uring_wake-poll-wakeup-flag.patch
+eventfd-provide-a-eventfd_signal_mask-helper.patch
+io_uring-pass-in-epoll_uring_wake-for-eventfd-signal.patch
+io_uring-improve-send-recv-error-handling.patch
+io_uring-ensure-recv-and-recvmsg-handle-msg_waitall-.patch
+io_uring-add-flag-for-disabling-provided-buffer-recy.patch
+io_uring-support-msg_waitall-for-ioring_op_send-msg.patch
+io_uring-allow-re-poll-if-we-made-progress.patch
+io_uring-fix-async-accept-on-o_nonblock-sockets.patch
+io_uring-ensure-that-cached-task-references-are-alwa.patch
+io_uring-remove-duplicated-calls-to-io_kiocb_ppos.patch
+io_uring-update-kiocb-ki_pos-at-execution-time.patch
+io_uring-do-not-recalculate-ppos-unnecessarily.patch
+io_uring-rw-defer-fsnotify-calls-to-task-context.patch