5.15-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)
diff --git a/queue-5.15/acpi-thermal-drop-an-always-true-check.patch b/queue-5.15/acpi-thermal-drop-an-always-true-check.patch

new file mode 100644 (file)

index 0000000..28e2905
--- /dev/null
+++ b/queue-5.15/acpi-thermal-drop-an-always-true-check.patch
@@ -0,0 +1,31 @@
+From e5b5d25444e9ee3ae439720e62769517d331fa39 Mon Sep 17 00:00:00 2001
+From: Adam Borowski <kilobyte@angband.pl>
+Date: Mon, 15 Nov 2021 18:32:08 +0100
+Subject: ACPI: thermal: drop an always true check
+
+From: Adam Borowski <kilobyte@angband.pl>
+
+commit e5b5d25444e9ee3ae439720e62769517d331fa39 upstream.
+
+Address of a field inside a struct can't possibly be null; gcc-12 warns
+about this.
+
+Signed-off-by: Adam Borowski <kilobyte@angband.pl>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/acpi/thermal.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/acpi/thermal.c
++++ b/drivers/acpi/thermal.c
+@@ -1098,8 +1098,6 @@ static int acpi_thermal_resume(struct de
+               return -EINVAL;
+ 
+       for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE; i++) {
+-              if (!(&tz->trips.active[i]))
+-                      break;
+               if (!tz->trips.active[i].flags.valid)
+                       break;
+               tz->trips.active[i].flags.enabled = 1;
diff --git a/queue-5.15/arm64-errata-add-cortex-a510-to-the-repeat-tlbi-list.patch b/queue-5.15/arm64-errata-add-cortex-a510-to-the-repeat-tlbi-list.patch

new file mode 100644 (file)

index 0000000..ceef3b9
--- /dev/null
+++ b/queue-5.15/arm64-errata-add-cortex-a510-to-the-repeat-tlbi-list.patch
@@ -0,0 +1,89 @@
+From 39fdb65f52e9a53d32a6ba719f96669fd300ae78 Mon Sep 17 00:00:00 2001
+From: James Morse <james.morse@arm.com>
+Date: Mon, 4 Jul 2022 16:57:32 +0100
+Subject: arm64: errata: Add Cortex-A510 to the repeat tlbi list
+
+From: James Morse <james.morse@arm.com>
+
+commit 39fdb65f52e9a53d32a6ba719f96669fd300ae78 upstream.
+
+Cortex-A510 is affected by an erratum where in rare circumstances the
+CPUs may not handle a race between a break-before-make sequence on one
+CPU, and another CPU accessing the same page. This could allow a store
+to a page that has been unmapped.
+
+Work around this by adding the affected CPUs to the list that needs
+TLB sequences to be done twice.
+
+Signed-off-by: James Morse <james.morse@arm.com>
+Link: https://lore.kernel.org/r/20220704155732.21216-1-james.morse@arm.com
+Signed-off-by: Will Deacon <will@kernel.org>
+Signed-off-by: Lucas Wei <lucaswei@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ Documentation/arm64/silicon-errata.rst |    2 ++
+ arch/arm64/Kconfig                     |   17 +++++++++++++++++
+ arch/arm64/kernel/cpu_errata.c         |    8 +++++++-
+ 3 files changed, 26 insertions(+), 1 deletion(-)
+
+--- a/Documentation/arm64/silicon-errata.rst
++++ b/Documentation/arm64/silicon-errata.rst
+@@ -92,6 +92,8 @@ stable kernels.
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM            | Cortex-A77      | #1508412        | ARM64_ERRATUM_1508412       |
+ +----------------+-----------------+-----------------+-----------------------------+
++| ARM            | Cortex-A510     | #2441009        | ARM64_ERRATUM_2441009       |
+++----------------+-----------------+-----------------+-----------------------------+
+ | ARM            | Neoverse-N1     | #1188873,1418040| ARM64_ERRATUM_1418040       |
+ +----------------+-----------------+-----------------+-----------------------------+
+ | ARM            | Neoverse-N1     | #1349291        | N/A                         |
+--- a/arch/arm64/Kconfig
++++ b/arch/arm64/Kconfig
+@@ -666,6 +666,23 @@ config ARM64_ERRATUM_1508412
+ 
+         If unsure, say Y.
+ 
++config ARM64_ERRATUM_2441009
++      bool "Cortex-A510: Completion of affected memory accesses might not be guaranteed by completion of a TLBI"
++      default y
++      select ARM64_WORKAROUND_REPEAT_TLBI
++      help
++        This option adds a workaround for ARM Cortex-A510 erratum #2441009.
++
++        Under very rare circumstances, affected Cortex-A510 CPUs
++        may not handle a race between a break-before-make sequence on one
++        CPU, and another CPU accessing the same page. This could allow a
++        store to a page that has been unmapped.
++
++        Work around this by adding the affected CPUs to the list that needs
++        TLB sequences to be done twice.
++
++        If unsure, say Y.
++
+ config CAVIUM_ERRATUM_22375
+       bool "Cavium erratum 22375, 24313"
+       default y
+--- a/arch/arm64/kernel/cpu_errata.c
++++ b/arch/arm64/kernel/cpu_errata.c
+@@ -214,6 +214,12 @@ static const struct arm64_cpu_capabiliti
+               ERRATA_MIDR_RANGE(MIDR_QCOM_KRYO_4XX_GOLD, 0xc, 0xe, 0xf, 0xe),
+       },
+ #endif
++#ifdef CONFIG_ARM64_ERRATUM_2441009
++      {
++              /* Cortex-A510 r0p0 -> r1p1. Fixed in r1p2 */
++              ERRATA_MIDR_RANGE(MIDR_CORTEX_A510, 0, 0, 1, 1),
++      },
++#endif
+       {},
+ };
+ #endif
+@@ -429,7 +435,7 @@ const struct arm64_cpu_capabilities arm6
+ #endif
+ #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI
+       {
+-              .desc = "Qualcomm erratum 1009, or ARM erratum 1286807",
++              .desc = "Qualcomm erratum 1009, or ARM erratum 1286807, 2441009",
+               .capability = ARM64_WORKAROUND_REPEAT_TLBI,
+               .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+               .matches = cpucap_multi_entry_cap_matches,
diff --git a/queue-5.15/crypto-lib-remove-unneeded-selection-of-xor_blocks.patch b/queue-5.15/crypto-lib-remove-unneeded-selection-of-xor_blocks.patch

new file mode 100644 (file)

index 0000000..743ef91
--- /dev/null
+++ b/queue-5.15/crypto-lib-remove-unneeded-selection-of-xor_blocks.patch
@@ -0,0 +1,37 @@
+From 874b301985ef2f89b8b592ad255e03fb6fbfe605 Mon Sep 17 00:00:00 2001
+From: Eric Biggers <ebiggers@google.com>
+Date: Thu, 25 Aug 2022 22:04:56 -0700
+Subject: crypto: lib - remove unneeded selection of XOR_BLOCKS
+
+From: Eric Biggers <ebiggers@google.com>
+
+commit 874b301985ef2f89b8b592ad255e03fb6fbfe605 upstream.
+
+CRYPTO_LIB_CHACHA_GENERIC doesn't need to select XOR_BLOCKS.  It perhaps
+was thought that it's needed for __crypto_xor, but that's not the case.
+
+Enabling XOR_BLOCKS is problematic because the XOR_BLOCKS code runs a
+benchmark when it is initialized.  That causes a boot time regression on
+systems that didn't have it enabled before.
+
+Therefore, remove this unnecessary and problematic selection.
+
+Fixes: e56e18985596 ("lib/crypto: add prompts back to crypto libraries")
+Cc: stable@vger.kernel.org
+Signed-off-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ lib/crypto/Kconfig |    1 -
+ 1 file changed, 1 deletion(-)
+
+--- a/lib/crypto/Kconfig
++++ b/lib/crypto/Kconfig
+@@ -33,7 +33,6 @@ config CRYPTO_ARCH_HAVE_LIB_CHACHA
+ 
+ config CRYPTO_LIB_CHACHA_GENERIC
+       tristate
+-      select XOR_BLOCKS
+       help
+         This symbol can be depended upon by arch implementations of the
+         ChaCha library interface that require the generic code as a
diff --git a/queue-5.15/drivers-hv-balloon-support-status-report-for-larger-page-sizes.patch b/queue-5.15/drivers-hv-balloon-support-status-report-for-larger-page-sizes.patch

new file mode 100644 (file)

index 0000000..ddee5a2
--- /dev/null
+++ b/queue-5.15/drivers-hv-balloon-support-status-report-for-larger-page-sizes.patch
@@ -0,0 +1,70 @@
+From b3d6dd09ff00fdcf4f7c0cb54700ffd5dd343502 Mon Sep 17 00:00:00 2001
+From: Boqun Feng <boqun.feng@gmail.com>
+Date: Fri, 25 Mar 2022 10:32:11 +0800
+Subject: Drivers: hv: balloon: Support status report for larger page sizes
+
+From: Boqun Feng <boqun.feng@gmail.com>
+
+commit b3d6dd09ff00fdcf4f7c0cb54700ffd5dd343502 upstream.
+
+DM_STATUS_REPORT expects the numbers of pages in the unit of 4k pages
+(HV_HYP_PAGE) instead of guest pages, so to make it work when guest page
+sizes are larger than 4k, convert the numbers of guest pages into the
+numbers of HV_HYP_PAGEs.
+
+Note that the numbers of guest pages are still used for tracing because
+tracing is internal to the guest kernel.
+
+Reported-by: Vitaly Kuznetsov <vkuznets@redhat.com>
+Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
+Reviewed-by: Michael Kelley <mikelley@microsoft.com>
+Link: https://lore.kernel.org/r/20220325023212.1570049-2-boqun.feng@gmail.com
+Signed-off-by: Wei Liu <wei.liu@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/hv/hv_balloon.c |   13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+--- a/drivers/hv/hv_balloon.c
++++ b/drivers/hv/hv_balloon.c
+@@ -17,6 +17,7 @@
+ #include <linux/slab.h>
+ #include <linux/kthread.h>
+ #include <linux/completion.h>
++#include <linux/count_zeros.h>
+ #include <linux/memory_hotplug.h>
+ #include <linux/memory.h>
+ #include <linux/notifier.h>
+@@ -1130,6 +1131,7 @@ static void post_status(struct hv_dynmem
+       struct dm_status status;
+       unsigned long now = jiffies;
+       unsigned long last_post = last_post_time;
++      unsigned long num_pages_avail, num_pages_committed;
+ 
+       if (pressure_report_delay > 0) {
+               --pressure_report_delay;
+@@ -1154,16 +1156,21 @@ static void post_status(struct hv_dynmem
+        * num_pages_onlined) as committed to the host, otherwise it can try
+        * asking us to balloon them out.
+        */
+-      status.num_avail = si_mem_available();
+-      status.num_committed = vm_memory_committed() +
++      num_pages_avail = si_mem_available();
++      num_pages_committed = vm_memory_committed() +
+               dm->num_pages_ballooned +
+               (dm->num_pages_added > dm->num_pages_onlined ?
+                dm->num_pages_added - dm->num_pages_onlined : 0) +
+               compute_balloon_floor();
+ 
+-      trace_balloon_status(status.num_avail, status.num_committed,
++      trace_balloon_status(num_pages_avail, num_pages_committed,
+                            vm_memory_committed(), dm->num_pages_ballooned,
+                            dm->num_pages_added, dm->num_pages_onlined);
++
++      /* Convert numbers of pages into numbers of HV_HYP_PAGEs. */
++      status.num_avail = num_pages_avail * NR_HV_HYP_PAGES_IN_PAGE;
++      status.num_committed = num_pages_committed * NR_HV_HYP_PAGES_IN_PAGE;
++
+       /*
+        * If our transaction ID is no longer current, just don't
+        * send the status. This can happen if we were interrupted
diff --git a/queue-5.15/drm-bridge-add-stubs-for-devm_drm_of_get_bridge-when-of-is-disabled.patch b/queue-5.15/drm-bridge-add-stubs-for-devm_drm_of_get_bridge-when-of-is-disabled.patch

new file mode 100644 (file)

index 0000000..5d9178b
--- /dev/null
+++ b/queue-5.15/drm-bridge-add-stubs-for-devm_drm_of_get_bridge-when-of-is-disabled.patch
@@ -0,0 +1,47 @@
+From 59050d783848d9b62e9d8fb6ce0cd00771c2bf87 Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime@cerno.tech>
+Date: Tue, 28 Sep 2021 20:13:33 +0200
+Subject: drm/bridge: Add stubs for devm_drm_of_get_bridge when OF is disabled
+
+From: Maxime Ripard <maxime@cerno.tech>
+
+commit 59050d783848d9b62e9d8fb6ce0cd00771c2bf87 upstream.
+
+If CONFIG_OF is disabled, devm_drm_of_get_bridge won't be compiled in
+and drivers using that function will fail to build.
+
+Add an inline stub so that we can still build-test those cases.
+
+Reported-by: Randy Dunlap <rdunlap@infradead.org>
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
+Link: https://patchwork.freedesktop.org/patch/msgid/20210928181333.1176840-1-maxime@cerno.tech
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/drm/drm_bridge.h |   13 ++++++++++++-
+ 1 file changed, 12 insertions(+), 1 deletion(-)
+
+--- a/include/drm/drm_bridge.h
++++ b/include/drm/drm_bridge.h
+@@ -911,9 +911,20 @@ struct drm_bridge *devm_drm_panel_bridge
+ struct drm_bridge *devm_drm_panel_bridge_add_typed(struct device *dev,
+                                                  struct drm_panel *panel,
+                                                  u32 connector_type);
++struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge);
++#endif
++
++#if defined(CONFIG_OF) && defined(CONFIG_DRM_PANEL_BRIDGE)
+ struct drm_bridge *devm_drm_of_get_bridge(struct device *dev, struct device_node *node,
+                                         u32 port, u32 endpoint);
+-struct drm_connector *drm_panel_bridge_connector(struct drm_bridge *bridge);
++#else
++static inline struct drm_bridge *devm_drm_of_get_bridge(struct device *dev,
++                                                      struct device_node *node,
++                                                      u32 port,
++                                                      u32 endpoint)
++{
++      return ERR_PTR(-ENODEV);
++}
+ #endif
+ 
+ #endif
diff --git a/queue-5.15/drm-vc4-hdmi-depends-on-config_pm.patch b/queue-5.15/drm-vc4-hdmi-depends-on-config_pm.patch

new file mode 100644 (file)

index 0000000..246af64
--- /dev/null
+++ b/queue-5.15/drm-vc4-hdmi-depends-on-config_pm.patch
@@ -0,0 +1,47 @@
+From 72e2329e7c9bbe15e7a813670497ec9c6f919af3 Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime@cerno.tech>
+Date: Wed, 29 Jun 2022 14:34:36 +0200
+Subject: drm/vc4: hdmi: Depends on CONFIG_PM
+
+From: Maxime Ripard <maxime@cerno.tech>
+
+commit 72e2329e7c9bbe15e7a813670497ec9c6f919af3 upstream.
+
+We already depend on runtime PM to get the power domains and clocks for
+most of the devices supported by the vc4 driver, so let's just select it
+to make sure it's there.
+
+Link: https://lore.kernel.org/r/20220629123510.1915022-38-maxime@cerno.tech
+Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
+Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+(cherry picked from commit f1bc386b319e93e56453ae27e9e83817bb1f6f95)
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Cc: "Sudip Mukherjee (Codethink)" <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vc4/Kconfig    |    1 +
+ drivers/gpu/drm/vc4/vc4_hdmi.c |    2 +-
+ 2 files changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/vc4/Kconfig
++++ b/drivers/gpu/drm/vc4/Kconfig
+@@ -5,6 +5,7 @@ config DRM_VC4
+       depends on DRM
+       depends on SND && SND_SOC
+       depends on COMMON_CLK
++      depends on PM
+       select DRM_KMS_HELPER
+       select DRM_KMS_CMA_HELPER
+       select DRM_GEM_CMA_HELPER
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -2122,7 +2122,7 @@ static int vc5_hdmi_init_resources(struc
+       return 0;
+ }
+ 
+-static int __maybe_unused vc4_hdmi_runtime_suspend(struct device *dev)
++static int vc4_hdmi_runtime_suspend(struct device *dev)
+ {
+       struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
+ 
diff --git a/queue-5.15/drm-vc4-hdmi-rework-power-up.patch b/queue-5.15/drm-vc4-hdmi-rework-power-up.patch

new file mode 100644 (file)

index 0000000..390c44c
--- /dev/null
+++ b/queue-5.15/drm-vc4-hdmi-rework-power-up.patch
@@ -0,0 +1,75 @@
+From 258e483a4d5e97a6a8caa74381ddc1f395ac1c71 Mon Sep 17 00:00:00 2001
+From: Maxime Ripard <maxime@cerno.tech>
+Date: Wed, 29 Jun 2022 14:34:37 +0200
+Subject: drm/vc4: hdmi: Rework power up
+
+From: Maxime Ripard <maxime@cerno.tech>
+
+commit 258e483a4d5e97a6a8caa74381ddc1f395ac1c71 upstream.
+
+The current code tries to handle the case where CONFIG_PM isn't selected
+by first calling our runtime_resume implementation and then properly
+report the power state to the runtime_pm core.
+
+This allows to have a functionning device even if pm_runtime_get_*
+functions are nops.
+
+However, the device power state if CONFIG_PM is enabled is
+RPM_SUSPENDED, and thus our vc4_hdmi_write() and vc4_hdmi_read() calls
+in the runtime_pm hooks will now report a warning since the device might
+not be properly powered.
+
+Even more so, we need CONFIG_PM enabled since the previous RaspberryPi
+have a power domain that needs to be powered up for the HDMI controller
+to be usable.
+
+The previous patch has created a dependency on CONFIG_PM, now we can
+just assume it's there and only call pm_runtime_resume_and_get() to make
+sure our device is powered in bind.
+
+Link: https://lore.kernel.org/r/20220629123510.1915022-39-maxime@cerno.tech
+Acked-by: Thomas Zimmermann <tzimmermann@suse.de>
+Tested-by: Stefan Wahren <stefan.wahren@i2se.com>
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+(cherry picked from commit 53565c28e6af2cef6bbf438c34250135e3564459)
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Cc: "Sudip Mukherjee (Codethink)" <sudipm.mukherjee@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/vc4/vc4_hdmi.c |   15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -2219,17 +2219,15 @@ static int vc4_hdmi_bind(struct device *
+       if (ret)
+               goto err_put_ddc;
+ 
++      pm_runtime_enable(dev);
++
+       /*
+-       * We need to have the device powered up at this point to call
+-       * our reset hook and for the CEC init.
++       *  We need to have the device powered up at this point to call
++       *  our reset hook and for the CEC init.
+        */
+-      ret = vc4_hdmi_runtime_resume(dev);
++      ret = pm_runtime_resume_and_get(dev);
+       if (ret)
+-              goto err_put_ddc;
+-
+-      pm_runtime_get_noresume(dev);
+-      pm_runtime_set_active(dev);
+-      pm_runtime_enable(dev);
++              goto err_disable_runtime_pm;
+ 
+       if (vc4_hdmi->variant->reset)
+               vc4_hdmi->variant->reset(vc4_hdmi);
+@@ -2278,6 +2276,7 @@ err_destroy_conn:
+ err_destroy_encoder:
+       drm_encoder_cleanup(encoder);
+       pm_runtime_put_sync(dev);
++err_disable_runtime_pm:
+       pm_runtime_disable(dev);
+ err_put_ddc:
+       put_device(&vc4_hdmi->ddc->dev);
diff --git a/queue-5.15/firmware-tegra-bpmp-do-only-aligned-access-to-ipc-memory-area.patch b/queue-5.15/firmware-tegra-bpmp-do-only-aligned-access-to-ipc-memory-area.patch

new file mode 100644 (file)

index 0000000..127de64
--- /dev/null
+++ b/queue-5.15/firmware-tegra-bpmp-do-only-aligned-access-to-ipc-memory-area.patch
@@ -0,0 +1,53 @@
+From a4740b148a04dc60e14fe6a1dfe216d3bae214fd Mon Sep 17 00:00:00 2001
+From: Timo Alho <talho@nvidia.com>
+Date: Wed, 22 Jun 2022 16:22:59 +0300
+Subject: firmware: tegra: bpmp: Do only aligned access to IPC memory area
+
+From: Timo Alho <talho@nvidia.com>
+
+commit a4740b148a04dc60e14fe6a1dfe216d3bae214fd upstream.
+
+Use memcpy_toio and memcpy_fromio variants of memcpy to guarantee no
+unaligned access to IPC memory area. This is to allow the IPC memory to
+be mapped as Device memory to further suppress speculative reads from
+happening within the 64 kB memory area above the IPC memory when 64 kB
+memory pages are used.
+
+Signed-off-by: Timo Alho <talho@nvidia.com>
+Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Cc: Jon Hunter <jonathanh@nvidia.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/firmware/tegra/bpmp.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/firmware/tegra/bpmp.c
++++ b/drivers/firmware/tegra/bpmp.c
+@@ -201,7 +201,7 @@ static ssize_t __tegra_bpmp_channel_read
+       int err;
+ 
+       if (data && size > 0)
+-              memcpy(data, channel->ib->data, size);
++              memcpy_fromio(data, channel->ib->data, size);
+ 
+       err = tegra_bpmp_ack_response(channel);
+       if (err < 0)
+@@ -245,7 +245,7 @@ static ssize_t __tegra_bpmp_channel_writ
+       channel->ob->flags = flags;
+ 
+       if (data && size > 0)
+-              memcpy(channel->ob->data, data, size);
++              memcpy_toio(channel->ob->data, data, size);
+ 
+       return tegra_bpmp_post_request(channel);
+ }
+@@ -420,7 +420,7 @@ void tegra_bpmp_mrq_return(struct tegra_
+       channel->ob->code = code;
+ 
+       if (data && size > 0)
+-              memcpy(channel->ob->data, data, size);
++              memcpy_toio(channel->ob->data, data, size);
+ 
+       err = tegra_bpmp_post_response(channel);
+       if (WARN_ON(err < 0))
diff --git a/queue-5.15/io_uring-bump-poll-refs-to-full-31-bits.patch b/queue-5.15/io_uring-bump-poll-refs-to-full-31-bits.patch

new file mode 100644 (file)

index 0000000..34135a1
--- /dev/null
+++ b/queue-5.15/io_uring-bump-poll-refs-to-full-31-bits.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:21 +0100
+Subject: io_uring: bump poll refs to full 31-bits
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <a043031ab97528ca7bd9aeed262bf4b3580d7ef5.1661594698.git.asml.silence@gmail.com>
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ upstream commmit e2c0cb7c0cc72939b61a7efee376206725796625 ]
+
+The previous commit:
+
+1bc84c40088 ("io_uring: remove poll entry from list when canceling all")
+
+removed a potential overflow condition for the poll references. They
+are currently limited to 20-bits, even if we have 31-bits available. The
+upper bit is used to mark for cancelation.
+
+Bump the poll ref space to 31-bits, making that kind of situation much
+harder to trigger in general. We'll separately add overflow checking
+and handling.
+
+Fixes: aa43477b0402 ("io_uring: poll rework")
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5314,7 +5314,7 @@ struct io_poll_table {
+ };
+ 
+ #define IO_POLL_CANCEL_FLAG   BIT(31)
+-#define IO_POLL_REF_MASK      ((1u << 20)-1)
++#define IO_POLL_REF_MASK      GENMASK(30, 0)
+ 
+ /*
+  * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
diff --git a/queue-5.15/io_uring-clean-cqe-filling-functions.patch b/queue-5.15/io_uring-clean-cqe-filling-functions.patch

new file mode 100644 (file)

index 0000000..cca7c7e
--- /dev/null
+++ b/queue-5.15/io_uring-clean-cqe-filling-functions.patch
@@ -0,0 +1,196 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:13 +0100
+Subject: io_uring: clean cqe filling functions
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <ca69eee269c72bb415d2863408355504a10ab8c8.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit 913a571affedd17239c4d4ea90c8874b32fc2191 ]
+
+Split io_cqring_fill_event() into a couple of more targeted functions.
+The first on is io_fill_cqe_aux() for completions that are not
+associated with request completions and doing the ->cq_extra accounting.
+Examples are additional CQEs from multishot poll and rsrc notifications.
+
+The second is io_fill_cqe_req(), should be called when it's a normal
+request completion. Nothing more to it at the moment, will be used in
+later patches.
+
+The last one is inlined __io_fill_cqe() for a finer grained control,
+should be used with caution and in hottest places.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/59a9117a4a44fc9efcf04b3afa51e0d080f5943c.1636559119.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   57 +++++++++++++++++++++++++++++----------------------------
+ 1 file changed, 29 insertions(+), 28 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1079,8 +1079,8 @@ static void io_uring_try_cancel_requests
+                                        bool cancel_all);
+ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
+ 
+-static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                               s32 res, u32 cflags);
++static void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags);
++
+ static void io_put_req(struct io_kiocb *req);
+ static void io_put_req_deferred(struct io_kiocb *req);
+ static void io_dismantle_req(struct io_kiocb *req);
+@@ -1515,7 +1515,7 @@ static void io_kill_timeout(struct io_ki
+               atomic_set(&req->ctx->cq_timeouts,
+                       atomic_read(&req->ctx->cq_timeouts) + 1);
+               list_del_init(&req->timeout.list);
+-              io_cqring_fill_event(req->ctx, req->user_data, status, 0);
++              io_fill_cqe_req(req, status, 0);
+               io_put_req_deferred(req);
+       }
+ }
+@@ -1790,8 +1790,8 @@ static bool io_cqring_event_overflow(str
+       return true;
+ }
+ 
+-static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                                        s32 res, u32 cflags)
++static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
++                               s32 res, u32 cflags)
+ {
+       struct io_uring_cqe *cqe;
+ 
+@@ -1812,11 +1812,16 @@ static inline bool __io_cqring_fill_even
+       return io_cqring_event_overflow(ctx, user_data, res, cflags);
+ }
+ 
+-/* not as hot to bloat with inlining */
+-static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                                        s32 res, u32 cflags)
++static noinline void io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
++{
++      __io_fill_cqe(req->ctx, req->user_data, res, cflags);
++}
++
++static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
++                                   s32 res, u32 cflags)
+ {
+-      return __io_cqring_fill_event(ctx, user_data, res, cflags);
++      ctx->cq_extra++;
++      return __io_fill_cqe(ctx, user_data, res, cflags);
+ }
+ 
+ static void io_req_complete_post(struct io_kiocb *req, s32 res,
+@@ -1825,7 +1830,7 @@ static void io_req_complete_post(struct
+       struct io_ring_ctx *ctx = req->ctx;
+ 
+       spin_lock(&ctx->completion_lock);
+-      __io_cqring_fill_event(ctx, req->user_data, res, cflags);
++      __io_fill_cqe(ctx, req->user_data, res, cflags);
+       /*
+        * If we're the last reference to this request, add to our locked
+        * free_list cache.
+@@ -2051,8 +2056,7 @@ static bool io_kill_linked_timeout(struc
+               link->timeout.head = NULL;
+               if (hrtimer_try_to_cancel(&io->timer) != -1) {
+                       list_del(&link->timeout.list);
+-                      io_cqring_fill_event(link->ctx, link->user_data,
+-                                           -ECANCELED, 0);
++                      io_fill_cqe_req(link, -ECANCELED, 0);
+                       io_put_req_deferred(link);
+                       return true;
+               }
+@@ -2076,7 +2080,7 @@ static void io_fail_links(struct io_kioc
+               link->link = NULL;
+ 
+               trace_io_uring_fail_link(req, link);
+-              io_cqring_fill_event(link->ctx, link->user_data, res, 0);
++              io_fill_cqe_req(link, res, 0);
+               io_put_req_deferred(link);
+               link = nxt;
+       }
+@@ -2093,8 +2097,7 @@ static bool io_disarm_next(struct io_kio
+               req->flags &= ~REQ_F_ARM_LTIMEOUT;
+               if (link && link->opcode == IORING_OP_LINK_TIMEOUT) {
+                       io_remove_next_linked(req);
+-                      io_cqring_fill_event(link->ctx, link->user_data,
+-                                           -ECANCELED, 0);
++                      io_fill_cqe_req(link, -ECANCELED, 0);
+                       io_put_req_deferred(link);
+                       posted = true;
+               }
+@@ -2370,8 +2373,8 @@ static void io_submit_flush_completions(
+       for (i = 0; i < nr; i++) {
+               struct io_kiocb *req = state->compl_reqs[i];
+ 
+-              __io_cqring_fill_event(ctx, req->user_data, req->result,
+-                                      req->compl.cflags);
++              __io_fill_cqe(ctx, req->user_data, req->result,
++                            req->compl.cflags);
+       }
+       io_commit_cqring(ctx);
+       spin_unlock(&ctx->completion_lock);
+@@ -2482,8 +2485,7 @@ static void io_iopoll_complete(struct io
+               req = list_first_entry(done, struct io_kiocb, inflight_entry);
+               list_del(&req->inflight_entry);
+ 
+-              __io_cqring_fill_event(ctx, req->user_data, req->result,
+-                                      io_put_rw_kbuf(req));
++              io_fill_cqe_req(req, req->result, io_put_rw_kbuf(req));
+               (*nr_events)++;
+ 
+               if (req_ref_put_and_test(req))
+@@ -5413,13 +5415,13 @@ static bool __io_poll_complete(struct io
+       }
+       if (req->poll.events & EPOLLONESHOT)
+               flags = 0;
+-      if (!io_cqring_fill_event(ctx, req->user_data, error, flags)) {
++
++      if (!(flags & IORING_CQE_F_MORE)) {
++              io_fill_cqe_req(req, error, flags);
++      } else if (!io_fill_cqe_aux(ctx, req->user_data, error, flags)) {
+               req->poll.events |= EPOLLONESHOT;
+               flags = 0;
+       }
+-      if (flags & IORING_CQE_F_MORE)
+-              ctx->cq_extra++;
+-
+       return !(flags & IORING_CQE_F_MORE);
+ }
+ 
+@@ -5746,9 +5748,9 @@ static bool io_poll_remove_one(struct io
+       do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
+ 
+       if (do_complete) {
+-              io_cqring_fill_event(req->ctx, req->user_data, -ECANCELED, 0);
+-              io_commit_cqring(req->ctx);
+               req_set_fail(req);
++              io_fill_cqe_req(req, -ECANCELED, 0);
++              io_commit_cqring(req->ctx);
+               io_put_req_deferred(req);
+       }
+       return do_complete;
+@@ -6045,7 +6047,7 @@ static int io_timeout_cancel(struct io_r
+               return PTR_ERR(req);
+ 
+       req_set_fail(req);
+-      io_cqring_fill_event(ctx, req->user_data, -ECANCELED, 0);
++      io_fill_cqe_req(req, -ECANCELED, 0);
+       io_put_req_deferred(req);
+       return 0;
+ }
+@@ -8271,8 +8273,7 @@ static void __io_rsrc_put_work(struct io
+ 
+                       io_ring_submit_lock(ctx, lock_ring);
+                       spin_lock(&ctx->completion_lock);
+-                      io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
+-                      ctx->cq_extra++;
++                      io_fill_cqe_aux(ctx, prsrc->tag, 0, 0);
+                       io_commit_cqring(ctx);
+                       spin_unlock(&ctx->completion_lock);
+                       io_cqring_ev_posted(ctx);
diff --git a/queue-5.15/io_uring-correct-fill-events-helpers-types.patch b/queue-5.15/io_uring-correct-fill-events-helpers-types.patch

new file mode 100644 (file)

index 0000000..6d807dc
--- /dev/null
+++ b/queue-5.15/io_uring-correct-fill-events-helpers-types.patch
@@ -0,0 +1,116 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:12 +0100
+Subject: io_uring: correct fill events helpers types
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <284e9d099964bf88fd0a97ff6cda636d51858a30.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commit 54daa9b2d80ab35824464b35a99f716e1cdf2ccb ]
+
+CQE result is a 32-bit integer, so the functions generating CQEs are
+better to accept not long but ints. Convert io_cqring_fill_event() and
+other helpers.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/7ca6f15255e9117eae28adcac272744cae29b113.1633373302.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   24 ++++++++++++------------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1080,7 +1080,7 @@ static void io_uring_try_cancel_requests
+ static void io_uring_cancel_generic(bool cancel_all, struct io_sq_data *sqd);
+ 
+ static bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                               long res, unsigned int cflags);
++                               s32 res, u32 cflags);
+ static void io_put_req(struct io_kiocb *req);
+ static void io_put_req_deferred(struct io_kiocb *req);
+ static void io_dismantle_req(struct io_kiocb *req);
+@@ -1763,7 +1763,7 @@ static __cold void io_uring_drop_tctx_re
+ }
+ 
+ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data,
+-                                   long res, unsigned int cflags)
++                                   s32 res, u32 cflags)
+ {
+       struct io_overflow_cqe *ocqe;
+ 
+@@ -1791,7 +1791,7 @@ static bool io_cqring_event_overflow(str
+ }
+ 
+ static inline bool __io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                                        long res, unsigned int cflags)
++                                        s32 res, u32 cflags)
+ {
+       struct io_uring_cqe *cqe;
+ 
+@@ -1814,13 +1814,13 @@ static inline bool __io_cqring_fill_even
+ 
+ /* not as hot to bloat with inlining */
+ static noinline bool io_cqring_fill_event(struct io_ring_ctx *ctx, u64 user_data,
+-                                        long res, unsigned int cflags)
++                                        s32 res, u32 cflags)
+ {
+       return __io_cqring_fill_event(ctx, user_data, res, cflags);
+ }
+ 
+-static void io_req_complete_post(struct io_kiocb *req, long res,
+-                               unsigned int cflags)
++static void io_req_complete_post(struct io_kiocb *req, s32 res,
++                               u32 cflags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+ 
+@@ -1861,8 +1861,8 @@ static inline bool io_req_needs_clean(st
+       return req->flags & IO_REQ_CLEAN_FLAGS;
+ }
+ 
+-static void io_req_complete_state(struct io_kiocb *req, long res,
+-                                unsigned int cflags)
++static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
++                                       u32 cflags)
+ {
+       if (io_req_needs_clean(req))
+               io_clean_op(req);
+@@ -1872,7 +1872,7 @@ static void io_req_complete_state(struct
+ }
+ 
+ static inline void __io_req_complete(struct io_kiocb *req, unsigned issue_flags,
+-                                   long res, unsigned cflags)
++                                   s32 res, u32 cflags)
+ {
+       if (issue_flags & IO_URING_F_COMPLETE_DEFER)
+               io_req_complete_state(req, res, cflags);
+@@ -1880,12 +1880,12 @@ static inline void __io_req_complete(str
+               io_req_complete_post(req, res, cflags);
+ }
+ 
+-static inline void io_req_complete(struct io_kiocb *req, long res)
++static inline void io_req_complete(struct io_kiocb *req, s32 res)
+ {
+       __io_req_complete(req, 0, res, 0);
+ }
+ 
+-static void io_req_complete_failed(struct io_kiocb *req, long res)
++static void io_req_complete_failed(struct io_kiocb *req, s32 res)
+ {
+       req_set_fail(req);
+       io_req_complete_post(req, res, 0);
+@@ -2707,7 +2707,7 @@ static bool __io_complete_rw_common(stru
+ static void io_req_task_complete(struct io_kiocb *req, bool *locked)
+ {
+       unsigned int cflags = io_put_rw_kbuf(req);
+-      long res = req->result;
++      int res = req->result;
+ 
+       if (*locked) {
+               struct io_ring_ctx *ctx = req->ctx;
diff --git a/queue-5.15/io_uring-fail-links-when-poll-fails.patch b/queue-5.15/io_uring-fail-links-when-poll-fails.patch

new file mode 100644 (file)

index 0000000..21ae478
--- /dev/null
+++ b/queue-5.15/io_uring-fail-links-when-poll-fails.patch
@@ -0,0 +1,37 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:22 +0100
+Subject: io_uring: fail links when poll fails
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <e9e78439b420e0b502fe7db89f5424c717a7ddca.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit c487a5ad48831afa6784b368ec40d0ee50f2fe1b ]
+
+Don't forget to cancel all linked requests of poll request when
+__io_arm_poll_handler() failed.
+
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/a78aad962460f9fdfe4aa4c0b62425c88f9415bc.1655852245.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5844,6 +5844,8 @@ static int io_poll_add(struct io_kiocb *
+       ipt.pt._qproc = io_poll_queue_proc;
+ 
+       ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
++      if (!ret && ipt.error)
++              req_set_fail(req);
+       ret = ret ?: ipt.error;
+       if (ret)
+               __io_req_complete(req, issue_flags, ret, 0);
diff --git a/queue-5.15/io_uring-fix-uaf-due-to-missing-pollfree-handling.patch b/queue-5.15/io_uring-fix-uaf-due-to-missing-pollfree-handling.patch

new file mode 100644 (file)

index 0000000..898a93e
--- /dev/null
+++ b/queue-5.15/io_uring-fix-uaf-due-to-missing-pollfree-handling.patch
@@ -0,0 +1,119 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:24 +0100
+Subject: io_uring: fix UAF due to missing POLLFREE handling
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com, Eric Biggers <ebiggers@google.com>, syzbot+5426c7ed6868c705ca14@syzkaller.appspotmail.com
+Message-ID: <23cf800d9e5e1ec5d7d3776cf57b4db87e5758dc.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit 791f3465c4afde02d7f16cf7424ca87070b69396 ]
+
+Fixes a problem described in 50252e4b5e989
+("aio: fix use-after-free due to missing POLLFREE handling")
+and copies the approach used there.
+
+In short, we have to forcibly eject a poll entry when we meet POLLFREE.
+We can't rely on io_poll_get_ownership() as can't wait for potentially
+running tw handlers, so we use the fact that wqs are RCU freed. See
+Eric's patch and comments for more details.
+
+Reported-by: Eric Biggers <ebiggers@google.com>
+Link: https://lore.kernel.org/r/20211209010455.42744-6-ebiggers@kernel.org
+Reported-and-tested-by: syzbot+5426c7ed6868c705ca14@syzkaller.appspotmail.com
+Fixes: 221c5eb233823 ("io_uring: add support for IORING_OP_POLL")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/4ed56b6f548f7ea337603a82315750449412748a.1642161259.git.asml.silence@gmail.com
+[axboe: drop non-functional change from patch]
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   58 ++++++++++++++++++++++++++++++++++++++++++++++++++--------
+ 1 file changed, 50 insertions(+), 8 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5369,12 +5369,14 @@ static void io_init_poll_iocb(struct io_
+ 
+ static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
+ {
+-      struct wait_queue_head *head = poll->head;
++      struct wait_queue_head *head = smp_load_acquire(&poll->head);
+ 
+-      spin_lock_irq(&head->lock);
+-      list_del_init(&poll->wait.entry);
+-      poll->head = NULL;
+-      spin_unlock_irq(&head->lock);
++      if (head) {
++              spin_lock_irq(&head->lock);
++              list_del_init(&poll->wait.entry);
++              poll->head = NULL;
++              spin_unlock_irq(&head->lock);
++      }
+ }
+ 
+ static void io_poll_remove_entries(struct io_kiocb *req)
+@@ -5382,10 +5384,26 @@ static void io_poll_remove_entries(struc
+       struct io_poll_iocb *poll = io_poll_get_single(req);
+       struct io_poll_iocb *poll_double = io_poll_get_double(req);
+ 
+-      if (poll->head)
+-              io_poll_remove_entry(poll);
+-      if (poll_double && poll_double->head)
++      /*
++       * While we hold the waitqueue lock and the waitqueue is nonempty,
++       * wake_up_pollfree() will wait for us.  However, taking the waitqueue
++       * lock in the first place can race with the waitqueue being freed.
++       *
++       * We solve this as eventpoll does: by taking advantage of the fact that
++       * all users of wake_up_pollfree() will RCU-delay the actual free.  If
++       * we enter rcu_read_lock() and see that the pointer to the queue is
++       * non-NULL, we can then lock it without the memory being freed out from
++       * under us.
++       *
++       * Keep holding rcu_read_lock() as long as we hold the queue lock, in
++       * case the caller deletes the entry from the queue, leaving it empty.
++       * In that case, only RCU prevents the queue memory from being freed.
++       */
++      rcu_read_lock();
++      io_poll_remove_entry(poll);
++      if (poll_double)
+               io_poll_remove_entry(poll_double);
++      rcu_read_unlock();
+ }
+ 
+ /*
+@@ -5523,6 +5541,30 @@ static int io_poll_wake(struct wait_queu
+                                                wait);
+       __poll_t mask = key_to_poll(key);
+ 
++      if (unlikely(mask & POLLFREE)) {
++              io_poll_mark_cancelled(req);
++              /* we have to kick tw in case it's not already */
++              io_poll_execute(req, 0);
++
++              /*
++               * If the waitqueue is being freed early but someone is already
++               * holds ownership over it, we have to tear down the request as
++               * best we can. That means immediately removing the request from
++               * its waitqueue and preventing all further accesses to the
++               * waitqueue via the request.
++               */
++              list_del_init(&poll->wait.entry);
++
++              /*
++               * Careful: this *must* be the last step, since as soon
++               * as req->head is NULL'ed out, the request can be
++               * completed and freed, since aio_poll_complete_work()
++               * will no longer need to take the waitqueue lock.
++               */
++              smp_store_release(&poll->head, NULL);
++              return 1;
++      }
++
+       /* for instances that support it check for an event match first */
+       if (mask && !(mask & poll->events))
+               return 0;
diff --git a/queue-5.15/io_uring-fix-wrong-arm_poll-error-handling.patch b/queue-5.15/io_uring-fix-wrong-arm_poll-error-handling.patch

new file mode 100644 (file)

index 0000000..725bfe8
--- /dev/null
+++ b/queue-5.15/io_uring-fix-wrong-arm_poll-error-handling.patch
@@ -0,0 +1,40 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:23 +0100
+Subject: io_uring: fix wrong arm_poll error handling
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <cd4e6937d06fd243778b5a0eece6e37b9d058900.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit 9d2ad2947a53abf5e5e6527a9eeed50a3a4cbc72 ]
+
+Leaving ip.error set when a request was punted to task_work execution is
+problematic, don't forget to clear it.
+
+Fixes: aa43477b04025 ("io_uring: poll rework")
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/a6c84ef4182c6962380aebe11b35bdcb25b0ccfb.1655852245.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5627,8 +5627,10 @@ static int __io_arm_poll_handler(struct
+ 
+       if (mask) {
+               /* can't multishot if failed, just queue the event we've got */
+-              if (unlikely(ipt->error || !ipt->nr_entries))
++              if (unlikely(ipt->error || !ipt->nr_entries)) {
+                       poll->events |= EPOLLONESHOT;
++                      ipt->error = 0;
++              }
+               __io_poll_execute(req, mask);
+               return 0;
+       }
diff --git a/queue-5.15/io_uring-inline-io_poll_complete.patch b/queue-5.15/io_uring-inline-io_poll_complete.patch

new file mode 100644 (file)

index 0000000..fda897c
--- /dev/null
+++ b/queue-5.15/io_uring-inline-io_poll_complete.patch
@@ -0,0 +1,54 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:17 +0100
+Subject: io_uring: inline io_poll_complete
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <acf073d1684c8bb956f5d26a5744430411863ad9.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit eb6e6f0690c846f7de46181bab3954c12c96e11e ]
+
+Inline io_poll_complete(), it's simple and doesn't have any particular
+purpose.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/933d7ee3e4450749a2d892235462c8f18d030293.1633373302.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   13 ++-----------
+ 1 file changed, 2 insertions(+), 11 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5447,16 +5447,6 @@ static bool __io_poll_complete(struct io
+       return !(flags & IORING_CQE_F_MORE);
+ }
+ 
+-static inline bool io_poll_complete(struct io_kiocb *req, __poll_t mask)
+-      __must_hold(&req->ctx->completion_lock)
+-{
+-      bool done;
+-
+-      done = __io_poll_complete(req, mask);
+-      io_commit_cqring(req->ctx);
+-      return done;
+-}
+-
+ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+@@ -5910,7 +5900,8 @@ static int io_poll_add(struct io_kiocb *
+ 
+       if (mask) { /* no async, we'd stolen it */
+               ipt.error = 0;
+-              done = io_poll_complete(req, mask);
++              done = __io_poll_complete(req, mask);
++              io_commit_cqring(req->ctx);
+       }
+       spin_unlock(&ctx->completion_lock);
+ 
diff --git a/queue-5.15/io_uring-kill-poll-linking-optimisation.patch b/queue-5.15/io_uring-kill-poll-linking-optimisation.patch

new file mode 100644 (file)

index 0000000..a04ce17
--- /dev/null
+++ b/queue-5.15/io_uring-kill-poll-linking-optimisation.patch
@@ -0,0 +1,51 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:16 +0100
+Subject: io_uring: kill poll linking optimisation
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <46e0b53479f36ef3f9a0330b41ed0ccdb27a52bf.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit ab1dab960b8352cee082db0f8a54dc92a948bfd7 ]
+
+With IORING_FEAT_FAST_POLL in place, io_put_req_find_next() for poll
+requests doesn't make much sense, and in any case re-adding it
+shouldn't be a problem considering batching in tctx_task_work(). We can
+remove it.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/15699682bf81610ec901d4e79d6da64baa9f70be.1639605189.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5460,7 +5460,6 @@ static inline bool io_poll_complete(stru
+ static void io_poll_task_func(struct io_kiocb *req, bool *locked)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+-      struct io_kiocb *nxt;
+ 
+       if (io_poll_rewait(req, &req->poll)) {
+               spin_unlock(&ctx->completion_lock);
+@@ -5484,11 +5483,8 @@ static void io_poll_task_func(struct io_
+               spin_unlock(&ctx->completion_lock);
+               io_cqring_ev_posted(ctx);
+ 
+-              if (done) {
+-                      nxt = io_put_req_find_next(req);
+-                      if (nxt)
+-                              io_req_task_submit(nxt, locked);
+-              }
++              if (done)
++                      io_put_req(req);
+       }
+ }
+ 
diff --git a/queue-5.15/io_uring-move-common-poll-bits.patch b/queue-5.15/io_uring-move-common-poll-bits.patch

new file mode 100644 (file)

index 0000000..6eb671b
--- /dev/null
+++ b/queue-5.15/io_uring-move-common-poll-bits.patch
@@ -0,0 +1,128 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:15 +0100
+Subject: io_uring: move common poll bits
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <6d53d9981de4757a8624d97917f3a2bed7c145ad.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit 5641897a5e8fb8abeb07e89c71a788d3db3ec75e ]
+
+Move some poll helpers/etc up, we'll need them there shortly
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/6c5c3dba24c86aad5cd389a54a8c7412e6a0621d.1639605189.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   74 +++++++++++++++++++++++++++++-----------------------------
+ 1 file changed, 37 insertions(+), 37 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5318,6 +5318,43 @@ struct io_poll_table {
+       int error;
+ };
+ 
++static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
++{
++      /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
++      if (req->opcode == IORING_OP_POLL_ADD)
++              return req->async_data;
++      return req->apoll->double_poll;
++}
++
++static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
++{
++      if (req->opcode == IORING_OP_POLL_ADD)
++              return &req->poll;
++      return &req->apoll->poll;
++}
++
++static void io_poll_req_insert(struct io_kiocb *req)
++{
++      struct io_ring_ctx *ctx = req->ctx;
++      struct hlist_head *list;
++
++      list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
++      hlist_add_head(&req->hash_node, list);
++}
++
++static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
++                            wait_queue_func_t wake_func)
++{
++      poll->head = NULL;
++      poll->done = false;
++      poll->canceled = false;
++#define IO_POLL_UNMASK        (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
++      /* mask in events that we always want/need */
++      poll->events = events | IO_POLL_UNMASK;
++      INIT_LIST_HEAD(&poll->wait.entry);
++      init_waitqueue_func_entry(&poll->wait, wake_func);
++}
++
+ static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+                          __poll_t mask, io_req_tw_func_t func)
+ {
+@@ -5366,21 +5403,6 @@ static bool io_poll_rewait(struct io_kio
+       return false;
+ }
+ 
+-static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
+-{
+-      /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
+-      if (req->opcode == IORING_OP_POLL_ADD)
+-              return req->async_data;
+-      return req->apoll->double_poll;
+-}
+-
+-static struct io_poll_iocb *io_poll_get_single(struct io_kiocb *req)
+-{
+-      if (req->opcode == IORING_OP_POLL_ADD)
+-              return &req->poll;
+-      return &req->apoll->poll;
+-}
+-
+ static void io_poll_remove_double(struct io_kiocb *req)
+       __must_hold(&req->ctx->completion_lock)
+ {
+@@ -5505,19 +5527,6 @@ static int io_poll_double_wake(struct wa
+       return 1;
+ }
+ 
+-static void io_init_poll_iocb(struct io_poll_iocb *poll, __poll_t events,
+-                            wait_queue_func_t wake_func)
+-{
+-      poll->head = NULL;
+-      poll->done = false;
+-      poll->canceled = false;
+-#define IO_POLL_UNMASK        (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
+-      /* mask in events that we always want/need */
+-      poll->events = events | IO_POLL_UNMASK;
+-      INIT_LIST_HEAD(&poll->wait.entry);
+-      init_waitqueue_func_entry(&poll->wait, wake_func);
+-}
+-
+ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
+                           struct wait_queue_head *head,
+                           struct io_poll_iocb **poll_ptr)
+@@ -5612,15 +5621,6 @@ static int io_async_wake(struct wait_que
+       return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
+ }
+ 
+-static void io_poll_req_insert(struct io_kiocb *req)
+-{
+-      struct io_ring_ctx *ctx = req->ctx;
+-      struct hlist_head *list;
+-
+-      list = &ctx->cancel_hash[hash_long(req->user_data, ctx->cancel_hash_bits)];
+-      hlist_add_head(&req->hash_node, list);
+-}
+-
+ static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
+                                     struct io_poll_iocb *poll,
+                                     struct io_poll_table *ipt, __poll_t mask,
diff --git a/queue-5.15/io_uring-poll-rework.patch b/queue-5.15/io_uring-poll-rework.patch

new file mode 100644 (file)

index 0000000..974a250
--- /dev/null
+++ b/queue-5.15/io_uring-poll-rework.patch
@@ -0,0 +1,803 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:18 +0100
+Subject: io_uring: poll rework
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <283c34b7ec28ab3ef4439cf6817106cedac69a3e.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit aa43477b040251f451db0d844073ac00a8ab66ee ]
+
+It's not possible to go forward with the current state of io_uring
+polling, we need a more straightforward and easier synchronisation.
+There are a lot of problems with how it is at the moment, including
+missing events on rewait.
+
+The main idea here is to introduce a notion of request ownership while
+polling, no one but the owner can modify any part but ->poll_refs of
+struct io_kiocb, that grants us protection against all sorts of races.
+
+Main users of such exclusivity are poll task_work handler, so before
+queueing a tw one should have/acquire ownership, which will be handed
+off to the tw handler.
+The other user is __io_arm_poll_handler() do initial poll arming. It
+starts taking the ownership, so tw handlers won't be run until it's
+released later in the function after vfs_poll. note: also prevents
+races in __io_queue_proc().
+Poll wake/etc. may not be able to get ownership, then they need to
+increase the poll refcount and the task_work should notice it and retry
+if necessary, see io_poll_check_events().
+There is also IO_POLL_CANCEL_FLAG flag to notify that we want to kill
+request.
+
+It makes cancellations more reliable, enables double multishot polling,
+fixes double poll rewait, fixes missing poll events and fixes another
+bunch of races.
+
+Even though it adds some overhead for new refcounting, and there are a
+couple of nice performance wins:
+- no req->refs refcounting for poll requests anymore
+- if the data is already there (once measured for some test to be 1-2%
+  of all apoll requests), it removes it doesn't add atomics and removes
+  spin_lock/unlock pair.
+- works well with multishots, we don't do remove from queue / add to
+  queue for each new poll event.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/6b652927c77ed9580ea4330ac5612f0e0848c946.1639605189.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |  527 +++++++++++++++++++++++++---------------------------------
+ 1 file changed, 228 insertions(+), 299 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -486,8 +486,6 @@ struct io_poll_iocb {
+       struct file                     *file;
+       struct wait_queue_head          *head;
+       __poll_t                        events;
+-      bool                            done;
+-      bool                            canceled;
+       struct wait_queue_entry         wait;
+ };
+ 
+@@ -885,6 +883,9 @@ struct io_kiocb {
+ 
+       /* store used ubuf, so we can prevent reloading */
+       struct io_mapped_ubuf           *imu;
++      /* stores selected buf, valid IFF REQ_F_BUFFER_SELECTED is set */
++      struct io_buffer                *kbuf;
++      atomic_t                        poll_refs;
+ };
+ 
+ struct io_tctx_node {
+@@ -5318,6 +5319,25 @@ struct io_poll_table {
+       int error;
+ };
+ 
++#define IO_POLL_CANCEL_FLAG   BIT(31)
++#define IO_POLL_REF_MASK      ((1u << 20)-1)
++
++/*
++ * If refs part of ->poll_refs (see IO_POLL_REF_MASK) is 0, it's free. We can
++ * bump it and acquire ownership. It's disallowed to modify requests while not
++ * owning it, that prevents from races for enqueueing task_work's and b/w
++ * arming poll and wakeups.
++ */
++static inline bool io_poll_get_ownership(struct io_kiocb *req)
++{
++      return !(atomic_fetch_inc(&req->poll_refs) & IO_POLL_REF_MASK);
++}
++
++static void io_poll_mark_cancelled(struct io_kiocb *req)
++{
++      atomic_or(IO_POLL_CANCEL_FLAG, &req->poll_refs);
++}
++
+ static struct io_poll_iocb *io_poll_get_double(struct io_kiocb *req)
+ {
+       /* pure poll stashes this in ->async_data, poll driven retry elsewhere */
+@@ -5346,8 +5366,6 @@ static void io_init_poll_iocb(struct io_
+                             wait_queue_func_t wake_func)
+ {
+       poll->head = NULL;
+-      poll->done = false;
+-      poll->canceled = false;
+ #define IO_POLL_UNMASK        (EPOLLERR|EPOLLHUP|EPOLLNVAL|EPOLLRDHUP)
+       /* mask in events that we always want/need */
+       poll->events = events | IO_POLL_UNMASK;
+@@ -5355,161 +5373,168 @@ static void io_init_poll_iocb(struct io_
+       init_waitqueue_func_entry(&poll->wait, wake_func);
+ }
+ 
+-static int __io_async_wake(struct io_kiocb *req, struct io_poll_iocb *poll,
+-                         __poll_t mask, io_req_tw_func_t func)
++static inline void io_poll_remove_entry(struct io_poll_iocb *poll)
+ {
+-      /* for instances that support it check for an event match first: */
+-      if (mask && !(mask & poll->events))
+-              return 0;
+-
+-      trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
++      struct wait_queue_head *head = poll->head;
+ 
++      spin_lock_irq(&head->lock);
+       list_del_init(&poll->wait.entry);
++      poll->head = NULL;
++      spin_unlock_irq(&head->lock);
++}
+ 
+-      req->result = mask;
+-      req->io_task_work.func = func;
++static void io_poll_remove_entries(struct io_kiocb *req)
++{
++      struct io_poll_iocb *poll = io_poll_get_single(req);
++      struct io_poll_iocb *poll_double = io_poll_get_double(req);
+ 
+-      /*
+-       * If this fails, then the task is exiting. When a task exits, the
+-       * work gets canceled, so just cancel this request as well instead
+-       * of executing it. We can't safely execute it anyway, as we may not
+-       * have the needed state needed for it anyway.
+-       */
+-      io_req_task_work_add(req);
+-      return 1;
++      if (poll->head)
++              io_poll_remove_entry(poll);
++      if (poll_double && poll_double->head)
++              io_poll_remove_entry(poll_double);
+ }
+ 
+-static bool io_poll_rewait(struct io_kiocb *req, struct io_poll_iocb *poll)
+-      __acquires(&req->ctx->completion_lock)
++/*
++ * All poll tw should go through this. Checks for poll events, manages
++ * references, does rewait, etc.
++ *
++ * Returns a negative error on failure. >0 when no action require, which is
++ * either spurious wakeup or multishot CQE is served. 0 when it's done with
++ * the request, then the mask is stored in req->result.
++ */
++static int io_poll_check_events(struct io_kiocb *req)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
++      struct io_poll_iocb *poll = io_poll_get_single(req);
++      int v;
+ 
+       /* req->task == current here, checking PF_EXITING is safe */
+       if (unlikely(req->task->flags & PF_EXITING))
+-              WRITE_ONCE(poll->canceled, true);
+-
+-      if (!req->result && !READ_ONCE(poll->canceled)) {
+-              struct poll_table_struct pt = { ._key = poll->events };
++              io_poll_mark_cancelled(req);
+ 
+-              req->result = vfs_poll(req->file, &pt) & poll->events;
+-      }
++      do {
++              v = atomic_read(&req->poll_refs);
+ 
+-      spin_lock(&ctx->completion_lock);
+-      if (!req->result && !READ_ONCE(poll->canceled)) {
+-              add_wait_queue(poll->head, &poll->wait);
+-              return true;
+-      }
++              /* tw handler should be the owner, and so have some references */
++              if (WARN_ON_ONCE(!(v & IO_POLL_REF_MASK)))
++                      return 0;
++              if (v & IO_POLL_CANCEL_FLAG)
++                      return -ECANCELED;
+ 
+-      return false;
+-}
++              if (!req->result) {
++                      struct poll_table_struct pt = { ._key = poll->events };
+ 
+-static void io_poll_remove_double(struct io_kiocb *req)
+-      __must_hold(&req->ctx->completion_lock)
+-{
+-      struct io_poll_iocb *poll = io_poll_get_double(req);
++                      req->result = vfs_poll(req->file, &pt) & poll->events;
++              }
+ 
+-      lockdep_assert_held(&req->ctx->completion_lock);
++              /* multishot, just fill an CQE and proceed */
++              if (req->result && !(poll->events & EPOLLONESHOT)) {
++                      __poll_t mask = mangle_poll(req->result & poll->events);
++                      bool filled;
++
++                      spin_lock(&ctx->completion_lock);
++                      filled = io_fill_cqe_aux(ctx, req->user_data, mask,
++                                               IORING_CQE_F_MORE);
++                      io_commit_cqring(ctx);
++                      spin_unlock(&ctx->completion_lock);
++                      if (unlikely(!filled))
++                              return -ECANCELED;
++                      io_cqring_ev_posted(ctx);
++              } else if (req->result) {
++                      return 0;
++              }
+ 
+-      if (poll && poll->head) {
+-              struct wait_queue_head *head = poll->head;
++              /*
++               * Release all references, retry if someone tried to restart
++               * task_work while we were executing it.
++               */
++      } while (atomic_sub_return(v & IO_POLL_REF_MASK, &req->poll_refs));
+ 
+-              spin_lock_irq(&head->lock);
+-              list_del_init(&poll->wait.entry);
+-              if (poll->wait.private)
+-                      req_ref_put(req);
+-              poll->head = NULL;
+-              spin_unlock_irq(&head->lock);
+-      }
++      return 1;
+ }
+ 
+-static bool __io_poll_complete(struct io_kiocb *req, __poll_t mask)
+-      __must_hold(&req->ctx->completion_lock)
++static void io_poll_task_func(struct io_kiocb *req, bool *locked)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+-      unsigned flags = IORING_CQE_F_MORE;
+-      int error;
++      int ret;
++
++      ret = io_poll_check_events(req);
++      if (ret > 0)
++              return;
+ 
+-      if (READ_ONCE(req->poll.canceled)) {
+-              error = -ECANCELED;
+-              req->poll.events |= EPOLLONESHOT;
++      if (!ret) {
++              req->result = mangle_poll(req->result & req->poll.events);
+       } else {
+-              error = mangle_poll(mask);
++              req->result = ret;
++              req_set_fail(req);
+       }
+-      if (req->poll.events & EPOLLONESHOT)
+-              flags = 0;
+ 
+-      if (!(flags & IORING_CQE_F_MORE)) {
+-              io_fill_cqe_req(req, error, flags);
+-      } else if (!io_fill_cqe_aux(ctx, req->user_data, error, flags)) {
+-              req->poll.events |= EPOLLONESHOT;
+-              flags = 0;
+-      }
+-      return !(flags & IORING_CQE_F_MORE);
++      io_poll_remove_entries(req);
++      spin_lock(&ctx->completion_lock);
++      hash_del(&req->hash_node);
++      spin_unlock(&ctx->completion_lock);
++      io_req_complete_post(req, req->result, 0);
+ }
+ 
+-static void io_poll_task_func(struct io_kiocb *req, bool *locked)
++static void io_apoll_task_func(struct io_kiocb *req, bool *locked)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
++      int ret;
+ 
+-      if (io_poll_rewait(req, &req->poll)) {
+-              spin_unlock(&ctx->completion_lock);
+-      } else {
+-              bool done;
++      ret = io_poll_check_events(req);
++      if (ret > 0)
++              return;
+ 
+-              if (req->poll.done) {
+-                      spin_unlock(&ctx->completion_lock);
+-                      return;
+-              }
+-              done = __io_poll_complete(req, req->result);
+-              if (done) {
+-                      io_poll_remove_double(req);
+-                      hash_del(&req->hash_node);
+-                      req->poll.done = true;
+-              } else {
+-                      req->result = 0;
+-                      add_wait_queue(req->poll.head, &req->poll.wait);
+-              }
+-              io_commit_cqring(ctx);
+-              spin_unlock(&ctx->completion_lock);
+-              io_cqring_ev_posted(ctx);
++      io_poll_remove_entries(req);
++      spin_lock(&ctx->completion_lock);
++      hash_del(&req->hash_node);
++      spin_unlock(&ctx->completion_lock);
+ 
+-              if (done)
+-                      io_put_req(req);
+-      }
++      if (!ret)
++              io_req_task_submit(req, locked);
++      else
++              io_req_complete_failed(req, ret);
++}
++
++static void __io_poll_execute(struct io_kiocb *req, int mask)
++{
++      req->result = mask;
++      if (req->opcode == IORING_OP_POLL_ADD)
++              req->io_task_work.func = io_poll_task_func;
++      else
++              req->io_task_work.func = io_apoll_task_func;
++
++      trace_io_uring_task_add(req->ctx, req->opcode, req->user_data, mask);
++      io_req_task_work_add(req);
++}
++
++static inline void io_poll_execute(struct io_kiocb *req, int res)
++{
++      if (io_poll_get_ownership(req))
++              __io_poll_execute(req, res);
+ }
+ 
+-static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
+-                             int sync, void *key)
++static void io_poll_cancel_req(struct io_kiocb *req)
++{
++      io_poll_mark_cancelled(req);
++      /* kick tw, which should complete the request */
++      io_poll_execute(req, 0);
++}
++
++static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
++                      void *key)
+ {
+       struct io_kiocb *req = wait->private;
+-      struct io_poll_iocb *poll = io_poll_get_single(req);
++      struct io_poll_iocb *poll = container_of(wait, struct io_poll_iocb,
++                                               wait);
+       __poll_t mask = key_to_poll(key);
+-      unsigned long flags;
+ 
+-      /* for instances that support it check for an event match first: */
++      /* for instances that support it check for an event match first */
+       if (mask && !(mask & poll->events))
+               return 0;
+-      if (!(poll->events & EPOLLONESHOT))
+-              return poll->wait.func(&poll->wait, mode, sync, key);
+ 
+-      list_del_init(&wait->entry);
+-
+-      if (poll->head) {
+-              bool done;
+-
+-              spin_lock_irqsave(&poll->head->lock, flags);
+-              done = list_empty(&poll->wait.entry);
+-              if (!done)
+-                      list_del_init(&poll->wait.entry);
+-              /* make sure double remove sees this as being gone */
+-              wait->private = NULL;
+-              spin_unlock_irqrestore(&poll->head->lock, flags);
+-              if (!done) {
+-                      /* use wait func handler, so it matches the rq type */
+-                      poll->wait.func(&poll->wait, mode, sync, key);
+-              }
+-      }
+-      req_ref_put(req);
++      if (io_poll_get_ownership(req))
++              __io_poll_execute(req, mask);
+       return 1;
+ }
+ 
+@@ -5525,10 +5550,10 @@ static void __io_queue_proc(struct io_po
+        * if this happens.
+        */
+       if (unlikely(pt->nr_entries)) {
+-              struct io_poll_iocb *poll_one = poll;
++              struct io_poll_iocb *first = poll;
+ 
+               /* double add on the same waitqueue head, ignore */
+-              if (poll_one->head == head)
++              if (first->head == head)
+                       return;
+               /* already have a 2nd entry, fail a third attempt */
+               if (*poll_ptr) {
+@@ -5537,25 +5562,19 @@ static void __io_queue_proc(struct io_po
+                       pt->error = -EINVAL;
+                       return;
+               }
+-              /*
+-               * Can't handle multishot for double wait for now, turn it
+-               * into one-shot mode.
+-               */
+-              if (!(poll_one->events & EPOLLONESHOT))
+-                      poll_one->events |= EPOLLONESHOT;
++
+               poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
+               if (!poll) {
+                       pt->error = -ENOMEM;
+                       return;
+               }
+-              io_init_poll_iocb(poll, poll_one->events, io_poll_double_wake);
+-              req_ref_get(req);
+-              poll->wait.private = req;
++              io_init_poll_iocb(poll, first->events, first->wait.func);
+               *poll_ptr = poll;
+       }
+ 
+       pt->nr_entries++;
+       poll->head = head;
++      poll->wait.private = req;
+ 
+       if (poll->events & EPOLLEXCLUSIVE)
+               add_wait_queue_exclusive(head, &poll->wait);
+@@ -5563,61 +5582,24 @@ static void __io_queue_proc(struct io_po
+               add_wait_queue(head, &poll->wait);
+ }
+ 
+-static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
++static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+                              struct poll_table_struct *p)
+ {
+       struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+-      struct async_poll *apoll = pt->req->apoll;
+-
+-      __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
+-}
+-
+-static void io_async_task_func(struct io_kiocb *req, bool *locked)
+-{
+-      struct async_poll *apoll = req->apoll;
+-      struct io_ring_ctx *ctx = req->ctx;
+-
+-      trace_io_uring_task_run(req->ctx, req, req->opcode, req->user_data);
+-
+-      if (io_poll_rewait(req, &apoll->poll)) {
+-              spin_unlock(&ctx->completion_lock);
+-              return;
+-      }
+-
+-      hash_del(&req->hash_node);
+-      io_poll_remove_double(req);
+-      apoll->poll.done = true;
+-      spin_unlock(&ctx->completion_lock);
+-
+-      if (!READ_ONCE(apoll->poll.canceled))
+-              io_req_task_submit(req, locked);
+-      else
+-              io_req_complete_failed(req, -ECANCELED);
+-}
+-
+-static int io_async_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+-                      void *key)
+-{
+-      struct io_kiocb *req = wait->private;
+-      struct io_poll_iocb *poll = &req->apoll->poll;
+ 
+-      trace_io_uring_poll_wake(req->ctx, req->opcode, req->user_data,
+-                                      key_to_poll(key));
+-
+-      return __io_async_wake(req, poll, key_to_poll(key), io_async_task_func);
++      __io_queue_proc(&pt->req->poll, pt, head,
++                      (struct io_poll_iocb **) &pt->req->async_data);
+ }
+ 
+-static __poll_t __io_arm_poll_handler(struct io_kiocb *req,
+-                                    struct io_poll_iocb *poll,
+-                                    struct io_poll_table *ipt, __poll_t mask,
+-                                    wait_queue_func_t wake_func)
+-      __acquires(&ctx->completion_lock)
++static int __io_arm_poll_handler(struct io_kiocb *req,
++                               struct io_poll_iocb *poll,
++                               struct io_poll_table *ipt, __poll_t mask)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+-      bool cancel = false;
++      int v;
+ 
+       INIT_HLIST_NODE(&req->hash_node);
+-      io_init_poll_iocb(poll, mask, wake_func);
++      io_init_poll_iocb(poll, mask, io_poll_wake);
+       poll->file = req->file;
+       poll->wait.private = req;
+ 
+@@ -5626,31 +5608,54 @@ static __poll_t __io_arm_poll_handler(st
+       ipt->error = 0;
+       ipt->nr_entries = 0;
+ 
++      /*
++       * Take the ownership to delay any tw execution up until we're done
++       * with poll arming. see io_poll_get_ownership().
++       */
++      atomic_set(&req->poll_refs, 1);
+       mask = vfs_poll(req->file, &ipt->pt) & poll->events;
+-      if (unlikely(!ipt->nr_entries) && !ipt->error)
+-              ipt->error = -EINVAL;
++
++      if (mask && (poll->events & EPOLLONESHOT)) {
++              io_poll_remove_entries(req);
++              /* no one else has access to the req, forget about the ref */
++              return mask;
++      }
++      if (!mask && unlikely(ipt->error || !ipt->nr_entries)) {
++              io_poll_remove_entries(req);
++              if (!ipt->error)
++                      ipt->error = -EINVAL;
++              return 0;
++      }
+ 
+       spin_lock(&ctx->completion_lock);
+-      if (ipt->error || (mask && (poll->events & EPOLLONESHOT)))
+-              io_poll_remove_double(req);
+-      if (likely(poll->head)) {
+-              spin_lock_irq(&poll->head->lock);
+-              if (unlikely(list_empty(&poll->wait.entry))) {
+-                      if (ipt->error)
+-                              cancel = true;
+-                      ipt->error = 0;
+-                      mask = 0;
+-              }
+-              if ((mask && (poll->events & EPOLLONESHOT)) || ipt->error)
+-                      list_del_init(&poll->wait.entry);
+-              else if (cancel)
+-                      WRITE_ONCE(poll->canceled, true);
+-              else if (!poll->done) /* actually waiting for an event */
+-                      io_poll_req_insert(req);
+-              spin_unlock_irq(&poll->head->lock);
++      io_poll_req_insert(req);
++      spin_unlock(&ctx->completion_lock);
++
++      if (mask) {
++              /* can't multishot if failed, just queue the event we've got */
++              if (unlikely(ipt->error || !ipt->nr_entries))
++                      poll->events |= EPOLLONESHOT;
++              __io_poll_execute(req, mask);
++              return 0;
+       }
+ 
+-      return mask;
++      /*
++       * Release ownership. If someone tried to queue a tw while it was
++       * locked, kick it off for them.
++       */
++      v = atomic_dec_return(&req->poll_refs);
++      if (unlikely(v & IO_POLL_REF_MASK))
++              __io_poll_execute(req, 0);
++      return 0;
++}
++
++static void io_async_queue_proc(struct file *file, struct wait_queue_head *head,
++                             struct poll_table_struct *p)
++{
++      struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
++      struct async_poll *apoll = pt->req->apoll;
++
++      __io_queue_proc(&apoll->poll, pt, head, &apoll->double_poll);
+ }
+ 
+ enum {
+@@ -5665,7 +5670,8 @@ static int io_arm_poll_handler(struct io
+       struct io_ring_ctx *ctx = req->ctx;
+       struct async_poll *apoll;
+       struct io_poll_table ipt;
+-      __poll_t ret, mask = EPOLLONESHOT | POLLERR | POLLPRI;
++      __poll_t mask = EPOLLONESHOT | POLLERR | POLLPRI;
++      int ret;
+ 
+       if (!req->file || !file_can_poll(req->file))
+               return IO_APOLL_ABORTED;
+@@ -5692,11 +5698,8 @@ static int io_arm_poll_handler(struct io
+       req->apoll = apoll;
+       req->flags |= REQ_F_POLLED;
+       ipt.pt._qproc = io_async_queue_proc;
+-      io_req_set_refcount(req);
+ 
+-      ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask,
+-                                      io_async_wake);
+-      spin_unlock(&ctx->completion_lock);
++      ret = __io_arm_poll_handler(req, &apoll->poll, &ipt, mask);
+       if (ret || ipt.error)
+               return ret ? IO_APOLL_READY : IO_APOLL_ABORTED;
+ 
+@@ -5705,43 +5708,6 @@ static int io_arm_poll_handler(struct io
+       return IO_APOLL_OK;
+ }
+ 
+-static bool __io_poll_remove_one(struct io_kiocb *req,
+-                               struct io_poll_iocb *poll, bool do_cancel)
+-      __must_hold(&req->ctx->completion_lock)
+-{
+-      bool do_complete = false;
+-
+-      if (!poll->head)
+-              return false;
+-      spin_lock_irq(&poll->head->lock);
+-      if (do_cancel)
+-              WRITE_ONCE(poll->canceled, true);
+-      if (!list_empty(&poll->wait.entry)) {
+-              list_del_init(&poll->wait.entry);
+-              do_complete = true;
+-      }
+-      spin_unlock_irq(&poll->head->lock);
+-      hash_del(&req->hash_node);
+-      return do_complete;
+-}
+-
+-static bool io_poll_remove_one(struct io_kiocb *req)
+-      __must_hold(&req->ctx->completion_lock)
+-{
+-      bool do_complete;
+-
+-      io_poll_remove_double(req);
+-      do_complete = __io_poll_remove_one(req, io_poll_get_single(req), true);
+-
+-      if (do_complete) {
+-              req_set_fail(req);
+-              io_fill_cqe_req(req, -ECANCELED, 0);
+-              io_commit_cqring(req->ctx);
+-              io_put_req_deferred(req);
+-      }
+-      return do_complete;
+-}
+-
+ /*
+  * Returns true if we found and killed one or more poll requests
+  */
+@@ -5750,7 +5716,8 @@ static bool io_poll_remove_all(struct io
+ {
+       struct hlist_node *tmp;
+       struct io_kiocb *req;
+-      int posted = 0, i;
++      bool found = false;
++      int i;
+ 
+       spin_lock(&ctx->completion_lock);
+       for (i = 0; i < (1U << ctx->cancel_hash_bits); i++) {
+@@ -5758,16 +5725,14 @@ static bool io_poll_remove_all(struct io
+ 
+               list = &ctx->cancel_hash[i];
+               hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+-                      if (io_match_task_safe(req, tsk, cancel_all))
+-                              posted += io_poll_remove_one(req);
++                      if (io_match_task_safe(req, tsk, cancel_all)) {
++                              io_poll_cancel_req(req);
++                              found = true;
++                      }
+               }
+       }
+       spin_unlock(&ctx->completion_lock);
+-
+-      if (posted)
+-              io_cqring_ev_posted(ctx);
+-
+-      return posted != 0;
++      return found;
+ }
+ 
+ static struct io_kiocb *io_poll_find(struct io_ring_ctx *ctx, __u64 sqe_addr,
+@@ -5788,19 +5753,26 @@ static struct io_kiocb *io_poll_find(str
+       return NULL;
+ }
+ 
++static bool io_poll_disarm(struct io_kiocb *req)
++      __must_hold(&ctx->completion_lock)
++{
++      if (!io_poll_get_ownership(req))
++              return false;
++      io_poll_remove_entries(req);
++      hash_del(&req->hash_node);
++      return true;
++}
++
+ static int io_poll_cancel(struct io_ring_ctx *ctx, __u64 sqe_addr,
+                         bool poll_only)
+       __must_hold(&ctx->completion_lock)
+ {
+-      struct io_kiocb *req;
++      struct io_kiocb *req = io_poll_find(ctx, sqe_addr, poll_only);
+ 
+-      req = io_poll_find(ctx, sqe_addr, poll_only);
+       if (!req)
+               return -ENOENT;
+-      if (io_poll_remove_one(req))
+-              return 0;
+-
+-      return -EALREADY;
++      io_poll_cancel_req(req);
++      return 0;
+ }
+ 
+ static __poll_t io_poll_parse_events(const struct io_uring_sqe *sqe,
+@@ -5850,23 +5822,6 @@ static int io_poll_update_prep(struct io
+       return 0;
+ }
+ 
+-static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+-                      void *key)
+-{
+-      struct io_kiocb *req = wait->private;
+-      struct io_poll_iocb *poll = &req->poll;
+-
+-      return __io_async_wake(req, poll, key_to_poll(key), io_poll_task_func);
+-}
+-
+-static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
+-                             struct poll_table_struct *p)
+-{
+-      struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
+-
+-      __io_queue_proc(&pt->req->poll, pt, head, (struct io_poll_iocb **) &pt->req->async_data);
+-}
+-
+ static int io_poll_add_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+ {
+       struct io_poll_iocb *poll = &req->poll;
+@@ -5888,57 +5843,31 @@ static int io_poll_add_prep(struct io_ki
+ static int io_poll_add(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_poll_iocb *poll = &req->poll;
+-      struct io_ring_ctx *ctx = req->ctx;
+       struct io_poll_table ipt;
+-      __poll_t mask;
+-      bool done;
++      int ret;
+ 
+       ipt.pt._qproc = io_poll_queue_proc;
+ 
+-      mask = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events,
+-                                      io_poll_wake);
+-
+-      if (mask) { /* no async, we'd stolen it */
+-              ipt.error = 0;
+-              done = __io_poll_complete(req, mask);
+-              io_commit_cqring(req->ctx);
+-      }
+-      spin_unlock(&ctx->completion_lock);
+-
+-      if (mask) {
+-              io_cqring_ev_posted(ctx);
+-              if (done)
+-                      io_put_req(req);
+-      }
+-      return ipt.error;
++      ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
++      ret = ret ?: ipt.error;
++      if (ret)
++              __io_req_complete(req, issue_flags, ret, 0);
++      return 0;
+ }
+ 
+ static int io_poll_update(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_kiocb *preq;
+-      bool completing;
+       int ret2, ret = 0;
+ 
+       spin_lock(&ctx->completion_lock);
+       preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+-      if (!preq) {
+-              ret = -ENOENT;
+-fail:
++      if (!preq || !io_poll_disarm(preq)) {
+               spin_unlock(&ctx->completion_lock);
++              ret = preq ? -EALREADY : -ENOENT;
+               goto out;
+       }
+-      io_poll_remove_double(preq);
+-      /*
+-       * Don't allow racy completion with singleshot, as we cannot safely
+-       * update those. For multishot, if we're racing with completion, just
+-       * let completion re-add it.
+-       */
+-      completing = !__io_poll_remove_one(preq, &preq->poll, false);
+-      if (completing && (preq->poll.events & EPOLLONESHOT)) {
+-              ret = -EALREADY;
+-              goto fail;
+-      }
+       spin_unlock(&ctx->completion_lock);
+ 
+       if (req->poll_update.update_events || req->poll_update.update_user_data) {
diff --git a/queue-5.15/io_uring-refactor-poll-update.patch b/queue-5.15/io_uring-refactor-poll-update.patch

new file mode 100644 (file)

index 0000000..744fd38
--- /dev/null
+++ b/queue-5.15/io_uring-refactor-poll-update.patch
@@ -0,0 +1,115 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:14 +0100
+Subject: io_uring: refactor poll update
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com
+Message-ID: <decb6e4ea41a43485fcdd1853cca4af0bd3ed526.1661594698.git.asml.silence@gmail.com>
+
+From: Pavel Begunkov <asml.silence@gmail.com>
+
+[ upstream commmit 2bbb146d96f4b45e17d6aeede300796bc1a96d68 ]
+
+Clean up io_poll_update() and unify cancellation paths for remove and
+update.
+
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Link: https://lore.kernel.org/r/5937138b6265a1285220e2fab1b28132c1d73ce3.1639605189.git.asml.silence@gmail.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |   62 ++++++++++++++++++++++++----------------------------------
+ 1 file changed, 26 insertions(+), 36 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5931,61 +5931,51 @@ static int io_poll_update(struct io_kioc
+       struct io_ring_ctx *ctx = req->ctx;
+       struct io_kiocb *preq;
+       bool completing;
+-      int ret;
++      int ret2, ret = 0;
+ 
+       spin_lock(&ctx->completion_lock);
+       preq = io_poll_find(ctx, req->poll_update.old_user_data, true);
+       if (!preq) {
+               ret = -ENOENT;
+-              goto err;
+-      }
+-
+-      if (!req->poll_update.update_events && !req->poll_update.update_user_data) {
+-              completing = true;
+-              ret = io_poll_remove_one(preq) ? 0 : -EALREADY;
+-              goto err;
++fail:
++              spin_unlock(&ctx->completion_lock);
++              goto out;
+       }
+-
++      io_poll_remove_double(preq);
+       /*
+        * Don't allow racy completion with singleshot, as we cannot safely
+        * update those. For multishot, if we're racing with completion, just
+        * let completion re-add it.
+        */
+-      io_poll_remove_double(preq);
+       completing = !__io_poll_remove_one(preq, &preq->poll, false);
+       if (completing && (preq->poll.events & EPOLLONESHOT)) {
+               ret = -EALREADY;
+-              goto err;
+-      }
+-      /* we now have a detached poll request. reissue. */
+-      ret = 0;
+-err:
+-      if (ret < 0) {
+-              spin_unlock(&ctx->completion_lock);
+-              req_set_fail(req);
+-              io_req_complete(req, ret);
+-              return 0;
+-      }
+-      /* only mask one event flags, keep behavior flags */
+-      if (req->poll_update.update_events) {
+-              preq->poll.events &= ~0xffff;
+-              preq->poll.events |= req->poll_update.events & 0xffff;
+-              preq->poll.events |= IO_POLL_UNMASK;
++              goto fail;
+       }
+-      if (req->poll_update.update_user_data)
+-              preq->user_data = req->poll_update.new_user_data;
+       spin_unlock(&ctx->completion_lock);
+ 
++      if (req->poll_update.update_events || req->poll_update.update_user_data) {
++              /* only mask one event flags, keep behavior flags */
++              if (req->poll_update.update_events) {
++                      preq->poll.events &= ~0xffff;
++                      preq->poll.events |= req->poll_update.events & 0xffff;
++                      preq->poll.events |= IO_POLL_UNMASK;
++              }
++              if (req->poll_update.update_user_data)
++                      preq->user_data = req->poll_update.new_user_data;
++
++              ret2 = io_poll_add(preq, issue_flags);
++              /* successfully updated, don't complete poll request */
++              if (!ret2)
++                      goto out;
++      }
++      req_set_fail(preq);
++      io_req_complete(preq, -ECANCELED);
++out:
++      if (ret < 0)
++              req_set_fail(req);
+       /* complete update request, we're done with it */
+       io_req_complete(req, ret);
+-
+-      if (!completing) {
+-              ret = io_poll_add(preq, issue_flags);
+-              if (ret < 0) {
+-                      req_set_fail(preq);
+-                      io_req_complete(preq, ret);
+-              }
+-      }
+       return 0;
+ }
+ 
diff --git a/queue-5.15/io_uring-remove-poll-entry-from-list-when-canceling-all.patch b/queue-5.15/io_uring-remove-poll-entry-from-list-when-canceling-all.patch

new file mode 100644 (file)

index 0000000..bdefcbc
--- /dev/null
+++ b/queue-5.15/io_uring-remove-poll-entry-from-list-when-canceling-all.patch
@@ -0,0 +1,48 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:20 +0100
+Subject: io_uring: remove poll entry from list when canceling all
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com, Dylan Yudaken <dylany@fb.com>
+Message-ID: <83da9963edf6789d744c4e6db4a3943e58ce8c0b.1661594698.git.asml.silence@gmail.com>
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ upstream commmit 61bc84c4008812d784c398cfb54118c1ba396dfc ]
+
+When the ring is exiting, as part of the shutdown, poll requests are
+removed. But io_poll_remove_all() does not remove entries when finding
+them, and since completions are done out-of-band, we can find and remove
+the same entry multiple times.
+
+We do guard the poll execution by poll ownership, but that does not
+exclude us from reissuing a new one once the previous removal ownership
+goes away.
+
+This can race with poll execution as well, where we then end up seeing
+req->apoll be NULL because a previous task_work requeue finished the
+request.
+
+Remove the poll entry when we find it and get ownership of it. This
+prevents multiple invocations from finding it.
+
+Fixes: aa43477b0402 ("io_uring: poll rework")
+Reported-by: Dylan Yudaken <dylany@fb.com>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -5720,6 +5720,7 @@ static bool io_poll_remove_all(struct io
+               list = &ctx->cancel_hash[i];
+               hlist_for_each_entry_safe(req, tmp, list, hash_node) {
+                       if (io_match_task_safe(req, tsk, cancel_all)) {
++                              hlist_del_init(&req->hash_node);
+                               io_poll_cancel_req(req);
+                               found = true;
+                       }
diff --git a/queue-5.15/io_uring-remove-unused-function-req_ref_put.patch b/queue-5.15/io_uring-remove-unused-function-req_ref_put.patch

new file mode 100644 (file)

index 0000000..988caaf
--- /dev/null
+++ b/queue-5.15/io_uring-remove-unused-function-req_ref_put.patch
@@ -0,0 +1,44 @@
+From foo@baz Thu Sep  1 11:59:31 AM CEST 2022
+From: Pavel Begunkov <asml.silence@gmail.com>
+Date: Mon, 29 Aug 2022 14:30:19 +0100
+Subject: io_uring: Remove unused function req_ref_put
+To: stable@vger.kernel.org
+Cc: Jens Axboe <axboe@kernel.dk>, asml.silence@gmail.com, Jiapeng Chong <jiapeng.chong@linux.alibaba.com>, Abaci Robot <abaci@linux.alibaba.com>
+Message-ID: <462047f63638cd5489cf368f90cd86115a57d983.1661594698.git.asml.silence@gmail.com>
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+[ upstream commmit c84b8a3fef663933007e885535591b9d30bdc860 ]
+
+Fix the following clang warnings:
+
+fs/io_uring.c:1195:20: warning: unused function 'req_ref_put'
+[-Wunused-function].
+
+Fixes: aa43477b0402 ("io_uring: poll rework")
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20220113162005.3011-1-jiapeng.chong@linux.alibaba.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+[pavel: backport]
+Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/io_uring.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/fs/io_uring.c
++++ b/fs/io_uring.c
+@@ -1155,12 +1155,6 @@ static inline bool req_ref_put_and_test(
+       return atomic_dec_and_test(&req->refs);
+ }
+ 
+-static inline void req_ref_put(struct io_kiocb *req)
+-{
+-      WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
+-      WARN_ON_ONCE(req_ref_put_and_test(req));
+-}
+-
+ static inline void req_ref_get(struct io_kiocb *req)
+ {
+       WARN_ON_ONCE(!(req->flags & REQ_F_REFCOUNT));
diff --git a/queue-5.15/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch b/queue-5.15/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch

new file mode 100644 (file)

index 0000000..42ce428
--- /dev/null
+++ b/queue-5.15/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
@@ -0,0 +1,39 @@
+From ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Tue, 12 Jul 2022 21:05:42 +0800
+Subject: mm/hugetlb: avoid corrupting page->mapping in hugetlb_mcopy_atomic_pte
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit ab74ef708dc51df7cf2b8a890b9c6990fac5c0c6 upstream.
+
+In MCOPY_ATOMIC_CONTINUE case with a non-shared VMA, pages in the page
+cache are installed in the ptes.  But hugepage_add_new_anon_rmap is called
+for them mistakenly because they're not vm_shared.  This will corrupt the
+page->mapping used by page cache code.
+
+Link: https://lkml.kernel.org/r/20220712130542.18836-1-linmiaohe@huawei.com
+Fixes: f619147104c8 ("userfaultfd: add UFFDIO_CONTINUE ioctl")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: Peter Xu <peterx@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5371,7 +5371,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+       if (!huge_pte_none(huge_ptep_get(dst_pte)))
+               goto out_release_unlock;
+ 
+-      if (vm_shared) {
++      if (page_in_pagecache) {
+               page_dup_rmap(page, true);
+       } else {
+               ClearHPageRestoreReserve(page);
diff --git a/queue-5.15/series b/queue-5.15/series

index bfcfae48adfcc40a9b90a547c48088c000b4107f..42105febf2ca7cda2af8262da430bfae03c45b8a 100644 (file)
--- a/queue-5.15/series
+++ b/queue-5.15/series
@@ -1 +1,23 @@
  mm-force-tlb-flush-for-pfnmap-mappings-before-unlink_file_vma.patch
+drm-bridge-add-stubs-for-devm_drm_of_get_bridge-when-of-is-disabled.patch
+acpi-thermal-drop-an-always-true-check.patch
+drm-vc4-hdmi-rework-power-up.patch
+drm-vc4-hdmi-depends-on-config_pm.patch
+firmware-tegra-bpmp-do-only-aligned-access-to-ipc-memory-area.patch
+crypto-lib-remove-unneeded-selection-of-xor_blocks.patch
+drivers-hv-balloon-support-status-report-for-larger-page-sizes.patch
+mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch
+arm64-errata-add-cortex-a510-to-the-repeat-tlbi-list.patch
+io_uring-correct-fill-events-helpers-types.patch
+io_uring-clean-cqe-filling-functions.patch
+io_uring-refactor-poll-update.patch
+io_uring-move-common-poll-bits.patch
+io_uring-kill-poll-linking-optimisation.patch
+io_uring-inline-io_poll_complete.patch
+io_uring-poll-rework.patch
+io_uring-remove-unused-function-req_ref_put.patch
+io_uring-remove-poll-entry-from-list-when-canceling-all.patch
+io_uring-bump-poll-refs-to-full-31-bits.patch
+io_uring-fail-links-when-poll-fails.patch
+io_uring-fix-wrong-arm_poll-error-handling.patch
+io_uring-fix-uaf-due-to-missing-pollfree-handling.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Thu, 1 Sep 2022 10:08:42 +0000 (12:08 +0200)
queue-5.15/acpi-thermal-drop-an-always-true-check.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/arm64-errata-add-cortex-a510-to-the-repeat-tlbi-list.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/crypto-lib-remove-unneeded-selection-of-xor_blocks.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drivers-hv-balloon-support-status-report-for-larger-page-sizes.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-bridge-add-stubs-for-devm_drm_of_get_bridge-when-of-is-disabled.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-vc4-hdmi-depends-on-config_pm.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/drm-vc4-hdmi-rework-power-up.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/firmware-tegra-bpmp-do-only-aligned-access-to-ipc-memory-area.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-bump-poll-refs-to-full-31-bits.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-clean-cqe-filling-functions.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-correct-fill-events-helpers-types.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-fail-links-when-poll-fails.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-fix-uaf-due-to-missing-pollfree-handling.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-fix-wrong-arm_poll-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-inline-io_poll_complete.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-kill-poll-linking-optimisation.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-move-common-poll-bits.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-poll-rework.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-refactor-poll-update.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-remove-poll-entry-from-list-when-canceling-all.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/io_uring-remove-unused-function-req_ref_put.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/mm-hugetlb-avoid-corrupting-page-mapping-in-hugetlb_mcopy_atomic_pte.patch	[new file with mode: 0644]	patch \| blob
queue-5.15/series		patch \| blob \| blame \| history