]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.18-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Jul 2022 14:52:41 +0000 (16:52 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 30 Jul 2022 14:52:41 +0000 (16:52 +0200)
added patches:
arm-dts-lan966x-fix-sys_clk-frequency.patch
arm-pxa2xx-fix-gpio-descriptor-tables.patch
asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch
bluetooth-always-set-event-mask-on-suspend.patch
bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch
bridge-do-not-send-empty-ifla_af_spec-attribute.patch
drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch
fs-sendfile-handles-o_nonblock-of-out_fd.patch
hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch
intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch
mm-fix-missing-wake-up-event-for-fsdax-pages.patch
mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch
mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch
nouveau-svm-fix-to-migrate-all-requested-pages.patch
ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch
revert-ocfs2-mount-shared-volume-without-ha-stack.patch
s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch
secretmem-fix-unhandled-fault-in-truncate.patch
tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch
tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch
tcp-fix-a-data-race-around-sysctl_tcp_frto.patch
tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch
tcp-fix-data-races-around-sysctl_tcp_dsack.patch
tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch
userfaultfd-provide-properly-masked-address-for-huge-pages.patch
watch_queue-fix-missing-locking-in-add_watch_to_object.patch
watch_queue-fix-missing-rcu-annotation.patch

28 files changed:
queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch [new file with mode: 0644]
queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch [new file with mode: 0644]
queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch [new file with mode: 0644]
queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch [new file with mode: 0644]
queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch [new file with mode: 0644]
queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch [new file with mode: 0644]
queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch [new file with mode: 0644]
queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch [new file with mode: 0644]
queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch [new file with mode: 0644]
queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch [new file with mode: 0644]
queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch [new file with mode: 0644]
queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch [new file with mode: 0644]
queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch [new file with mode: 0644]
queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch [new file with mode: 0644]
queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch [new file with mode: 0644]
queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch [new file with mode: 0644]
queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch [new file with mode: 0644]
queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch [new file with mode: 0644]
queue-5.18/series [new file with mode: 0644]
queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch [new file with mode: 0644]
queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch [new file with mode: 0644]
queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch [new file with mode: 0644]
queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch [new file with mode: 0644]
queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch [new file with mode: 0644]
queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch [new file with mode: 0644]
queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch [new file with mode: 0644]
queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch [new file with mode: 0644]
queue-5.18/watch_queue-fix-missing-rcu-annotation.patch [new file with mode: 0644]

diff --git a/queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch b/queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch
new file mode 100644 (file)
index 0000000..c5e9ef8
--- /dev/null
@@ -0,0 +1,62 @@
+From ef0324b6415db6742bd632dc0dfbb8fbc111473b Mon Sep 17 00:00:00 2001
+From: Michael Walle <michael@walle.cc>
+Date: Sat, 26 Mar 2022 20:40:28 +0100
+Subject: ARM: dts: lan966x: fix sys_clk frequency
+
+From: Michael Walle <michael@walle.cc>
+
+commit ef0324b6415db6742bd632dc0dfbb8fbc111473b upstream.
+
+The sys_clk frequency is 165.625MHz. The register reference of the
+Generic Clock controller lists the CPU clock as 600MHz, the DDR clock as
+300MHz and the SYS clock as 162.5MHz. This is wrong. It was first
+noticed during the fan driver development and it was measured and
+verified via the CLK_MON output of the SoC which can be configured to
+output sys_clk/64.
+
+The core PLL settings (which drives the SYS clock) seems to be as
+follows:
+  DIVF = 52
+  DIVQ = 3
+  DIVR = 1
+
+With a refernce clock of 25MHz, this means we have a post divider clock
+  Fpfd = Fref / (DIVR + 1) = 25MHz / (1 + 1) = 12.5MHz
+
+The resulting VCO frequency is then
+  Fvco = Fpfd * (DIVF + 1) * 2 = 12.5MHz * (52 + 1) * 2 = 1325MHz
+
+And the output frequency is
+  Fout = Fvco / 2^DIVQ = 1325MHz / 2^3 = 165.625Mhz
+
+This all adds up to the constrains of the PLL:
+    10MHz <= Fpfd <= 200MHz
+    20MHz <= Fout <= 1000MHz
+  1000MHz <= Fvco <= 2000MHz
+
+Fixes: 290deaa10c50 ("ARM: dts: add DT for lan966 SoC and 2-port board pcb8291")
+Signed-off-by: Michael Walle <michael@walle.cc>
+Reviewed-by: Kavyasree Kotagiri <kavyasree.kotagiri@microchip.com>
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Link: https://lore.kernel.org/r/20220326194028.2945985-1-michael@walle.cc
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/boot/dts/lan966x.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi
+index 3cb02fffe716..38e90a31d2dd 100644
+--- a/arch/arm/boot/dts/lan966x.dtsi
++++ b/arch/arm/boot/dts/lan966x.dtsi
+@@ -38,7 +38,7 @@ clocks {
+               sys_clk: sys_clk {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+-                      clock-frequency = <162500000>;
++                      clock-frequency = <165625000>;
+               };
+               cpu_clk: cpu_clk {
+-- 
+2.37.1
+
diff --git a/queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch b/queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch
new file mode 100644 (file)
index 0000000..a616783
--- /dev/null
@@ -0,0 +1,142 @@
+From c5cdb9286913aa5a5ebb81bcca0c17df3b0e2c79 Mon Sep 17 00:00:00 2001
+From: Linus Walleij <linus.walleij@linaro.org>
+Date: Fri, 22 Jul 2022 13:46:11 +0200
+Subject: ARM: pxa2xx: Fix GPIO descriptor tables
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+commit c5cdb9286913aa5a5ebb81bcca0c17df3b0e2c79 upstream.
+
+Laurence reports:
+
+"Kernel >5.18 on Zaurus has a bug where the power management code can't
+talk to devices, emitting the following errors:
+
+sharpsl-pm sharpsl-pm: Error: AC check failed: voltage -22.
+sharpsl-pm sharpsl-pm: Charging Error!
+sharpsl-pm sharpsl-pm: Warning: Cannot read main battery!
+
+Looking at the recent changes, I found that commit 31455bbda208 ("spi:
+pxa2xx_spi: Convert to use GPIO descriptors") replaced the deprecated
+SPI chip select platform device code with a gpiod lookup table. However,
+this didn't seem to work until I changed the `dev_id` member from the
+device name to the bus id. I'm not entirely sure why this is necessary,
+but I suspect it is related to the fact that in sysfs SPI devices are
+attached under /sys/devices/.../dev_name/spi_master/spiB/spiB.C, rather
+than directly to the device."
+
+After reviewing the change I conclude that the same fix is needed
+for all affected boards.
+
+Fixes: 31455bbda208 ("spi: pxa2xx_spi: Convert to use GPIO descriptors")
+Reported-by: Laurence de Bruxelles <lfdebrux@gmail.com>
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20220722114611.1517414-1-linus.walleij@linaro.org'
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/arm/mach-pxa/corgi.c     |    2 +-
+ arch/arm/mach-pxa/hx4700.c    |    2 +-
+ arch/arm/mach-pxa/icontrol.c  |    4 ++--
+ arch/arm/mach-pxa/littleton.c |    2 +-
+ arch/arm/mach-pxa/magician.c  |    2 +-
+ arch/arm/mach-pxa/spitz.c     |    2 +-
+ arch/arm/mach-pxa/z2.c        |    4 ++--
+ 7 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/arm/mach-pxa/corgi.c
++++ b/arch/arm/mach-pxa/corgi.c
+@@ -531,7 +531,7 @@ static struct pxa2xx_spi_controller corg
+ };
+ static struct gpiod_lookup_table corgi_spi_gpio_table = {
+-      .dev_id = "pxa2xx-spi.1",
++      .dev_id = "spi1",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+--- a/arch/arm/mach-pxa/hx4700.c
++++ b/arch/arm/mach-pxa/hx4700.c
+@@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_
+ };
+ static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+-      .dev_id = "pxa2xx-spi.2",
++      .dev_id = "spi2",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               { },
+--- a/arch/arm/mach-pxa/icontrol.c
++++ b/arch/arm/mach-pxa/icontrol.c
+@@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = {
+ };
+ static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
+-      .dev_id = "pxa2xx-spi.3",
++      .dev_id = "spi3",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
+@@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp
+ };
+ static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
+-      .dev_id = "pxa2xx-spi.4",
++      .dev_id = "spi4",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
+--- a/arch/arm/mach-pxa/littleton.c
++++ b/arch/arm/mach-pxa/littleton.c
+@@ -208,7 +208,7 @@ static struct spi_board_info littleton_s
+ };
+ static struct gpiod_lookup_table littleton_spi_gpio_table = {
+-      .dev_id = "pxa2xx-spi.2",
++      .dev_id = "spi2",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               { },
+--- a/arch/arm/mach-pxa/magician.c
++++ b/arch/arm/mach-pxa/magician.c
+@@ -946,7 +946,7 @@ static struct pxa2xx_spi_controller magi
+ };
+ static struct gpiod_lookup_table magician_spi_gpio_table = {
+-      .dev_id = "pxa2xx-spi.2",
++      .dev_id = "spi2",
+       .table = {
+               /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
+               GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
+--- a/arch/arm/mach-pxa/spitz.c
++++ b/arch/arm/mach-pxa/spitz.c
+@@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spit
+ };
+ static struct gpiod_lookup_table spitz_spi_gpio_table = {
+-      .dev_id = "pxa2xx-spi.2",
++      .dev_id = "spi2",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
+--- a/arch/arm/mach-pxa/z2.c
++++ b/arch/arm/mach-pxa/z2.c
+@@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_
+ };
+ static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
+-      .dev_id = "pxa2xx-spi.1",
++      .dev_id = "spi1",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               { },
+@@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp
+ };
+ static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
+-      .dev_id = "pxa2xx-spi.2",
++      .dev_id = "spi2",
+       .table = {
+               GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
+               { },
diff --git a/queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch b/queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch
new file mode 100644 (file)
index 0000000..da5989b
--- /dev/null
@@ -0,0 +1,51 @@
+From e2a619ca0b38f2114347b7078b8a67d72d457a3d Mon Sep 17 00:00:00 2001
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Date: Fri, 22 Jul 2022 13:07:11 +0200
+Subject: asm-generic: remove a broken and needless ifdef conditional
+
+From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+
+commit e2a619ca0b38f2114347b7078b8a67d72d457a3d upstream.
+
+Commit 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+introduces the config symbol GENERIC_LIB_DEVMEM_IS_ALLOWED, but then
+falsely refers to CONFIG_GENERIC_DEVMEM_IS_ALLOWED (note the missing LIB
+in the reference) in ./include/asm-generic/io.h.
+
+Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs:
+
+GENERIC_DEVMEM_IS_ALLOWED
+Referencing files: include/asm-generic/io.h
+
+The actual fix, though, is simply to not to make this function declaration
+dependent on any kernel config. For architectures that intend to use
+the generic version, the arch's 'select GENERIC_LIB_DEVMEM_IS_ALLOWED' will
+lead to picking the function definition, and for other architectures, this
+function is simply defined elsewhere.
+
+The wrong '#ifndef' on a non-existing config symbol also always had the
+same effect (although more by mistake than by intent). So, there is no
+functional change.
+
+Remove this broken and needless ifdef conditional.
+
+Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()")
+Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/asm-generic/io.h |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/include/asm-generic/io.h
++++ b/include/asm-generic/io.h
+@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile
+ }
+ #endif
+-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
+ extern int devmem_is_allowed(unsigned long pfn);
+-#endif
+ #endif /* __KERNEL__ */
diff --git a/queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch b/queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch
new file mode 100644 (file)
index 0000000..e59e49b
--- /dev/null
@@ -0,0 +1,44 @@
+From ef61b6ea154464fefd8a6712d7a3b43b445c3d4a Mon Sep 17 00:00:00 2001
+From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
+Date: Mon, 25 Jul 2022 15:34:21 -0700
+Subject: Bluetooth: Always set event mask on suspend
+
+From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
+
+commit ef61b6ea154464fefd8a6712d7a3b43b445c3d4a upstream.
+
+When suspending, always set the event mask once disconnects are
+successful. Otherwise, if wakeup is disallowed, the event mask is not
+set before suspend continues and can result in an early wakeup.
+
+Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier")
+Cc: stable@vger.kernel.org
+Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bluetooth/hci_sync.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -4942,6 +4942,9 @@ int hci_suspend_sync(struct hci_dev *hde
+               return err;
+       }
++      /* Update event mask so only the allowed event can wakeup the host */
++      hci_set_event_mask_sync(hdev);
++
+       /* Only configure accept list if disconnect succeeded and wake
+        * isn't being prevented.
+        */
+@@ -4953,9 +4956,6 @@ int hci_suspend_sync(struct hci_dev *hde
+       /* Unpause to take care of updating scanning params */
+       hdev->scanning_paused = false;
+-      /* Update event mask so only the allowed event can wakeup the host */
+-      hci_set_event_mask_sync(hdev);
+-
+       /* Enable event filter for paired devices */
+       hci_update_event_filter_sync(hdev);
diff --git a/queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch b/queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch
new file mode 100644 (file)
index 0000000..71ad8f9
--- /dev/null
@@ -0,0 +1,264 @@
+From d0be8347c623e0ac4202a1d4e0373882821f56b0 Mon Sep 17 00:00:00 2001
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Date: Thu, 21 Jul 2022 09:10:50 -0700
+Subject: Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put
+
+From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+
+commit d0be8347c623e0ac4202a1d4e0373882821f56b0 upstream.
+
+This fixes the following trace which is caused by hci_rx_work starting up
+*after* the final channel reference has been put() during sock_close() but
+*before* the references to the channel have been destroyed, so instead
+the code now rely on kref_get_unless_zero/l2cap_chan_hold_unless_zero to
+prevent referencing a channel that is about to be destroyed.
+
+  refcount_t: increment on 0; use-after-free.
+  BUG: KASAN: use-after-free in refcount_dec_and_test+0x20/0xd0
+  Read of size 4 at addr ffffffc114f5bf18 by task kworker/u17:14/705
+
+  CPU: 4 PID: 705 Comm: kworker/u17:14 Tainted: G S      W
+  4.14.234-00003-g1fb6d0bd49a4-dirty #28
+  Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150
+  Google Inc. MSM sm8150 Flame DVT (DT)
+  Workqueue: hci0 hci_rx_work
+  Call trace:
+   dump_backtrace+0x0/0x378
+   show_stack+0x20/0x2c
+   dump_stack+0x124/0x148
+   print_address_description+0x80/0x2e8
+   __kasan_report+0x168/0x188
+   kasan_report+0x10/0x18
+   __asan_load4+0x84/0x8c
+   refcount_dec_and_test+0x20/0xd0
+   l2cap_chan_put+0x48/0x12c
+   l2cap_recv_frame+0x4770/0x6550
+   l2cap_recv_acldata+0x44c/0x7a4
+   hci_acldata_packet+0x100/0x188
+   hci_rx_work+0x178/0x23c
+   process_one_work+0x35c/0x95c
+   worker_thread+0x4cc/0x960
+   kthread+0x1a8/0x1c4
+   ret_from_fork+0x10/0x18
+
+Cc: stable@kernel.org
+Reported-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Tested-by: Lee Jones <lee.jones@linaro.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/bluetooth/l2cap.h |    1 
+ net/bluetooth/l2cap_core.c    |   61 +++++++++++++++++++++++++++++++++---------
+ 2 files changed, 49 insertions(+), 13 deletions(-)
+
+--- a/include/net/bluetooth/l2cap.h
++++ b/include/net/bluetooth/l2cap.h
+@@ -847,6 +847,7 @@ enum {
+ };
+ void l2cap_chan_hold(struct l2cap_chan *c);
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
+ void l2cap_chan_put(struct l2cap_chan *c);
+ static inline void l2cap_chan_lock(struct l2cap_chan *chan)
+--- a/net/bluetooth/l2cap_core.c
++++ b/net/bluetooth/l2cap_core.c
+@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_ch
+ }
+ /* Find channel with given SCID.
+- * Returns locked channel. */
++ * Returns a reference locked channel.
++ */
+ static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
+                                                u16 cid)
+ {
+@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_scid(conn, cid);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+       return c;
+ }
+ /* Find channel with given DCID.
+- * Returns locked channel.
++ * Returns a reference locked channel.
+  */
+ static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
+                                                u16 cid)
+@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_dcid(conn, cid);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+       return c;
+@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan
+       mutex_lock(&conn->chan_lock);
+       c = __l2cap_get_chan_by_ident(conn, ident);
+-      if (c)
+-              l2cap_chan_lock(c);
++      if (c) {
++              /* Only lock if chan reference is not 0 */
++              c = l2cap_chan_hold_unless_zero(c);
++              if (c)
++                      l2cap_chan_lock(c);
++      }
+       mutex_unlock(&conn->chan_lock);
+       return c;
+@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan *
+       kref_get(&c->kref);
+ }
++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
++{
++      BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
++
++      if (!kref_get_unless_zero(&c->kref))
++              return NULL;
++
++      return c;
++}
++
+ void l2cap_chan_put(struct l2cap_chan *c)
+ {
+       BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+@@ -1968,7 +1991,10 @@ static struct l2cap_chan *l2cap_global_c
+                       src_match = !bacmp(&c->src, src);
+                       dst_match = !bacmp(&c->dst, dst);
+                       if (src_match && dst_match) {
+-                              l2cap_chan_hold(c);
++                              c = l2cap_chan_hold_unless_zero(c);
++                              if (!c)
++                                      continue;
++
+                               read_unlock(&chan_list_lock);
+                               return c;
+                       }
+@@ -1983,7 +2009,7 @@ static struct l2cap_chan *l2cap_global_c
+       }
+       if (c1)
+-              l2cap_chan_hold(c1);
++              c1 = l2cap_chan_hold_unless_zero(c1);
+       read_unlock(&chan_list_lock);
+@@ -4463,6 +4489,7 @@ static inline int l2cap_config_req(struc
+ unlock:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return err;
+ }
+@@ -4577,6 +4604,7 @@ static inline int l2cap_config_rsp(struc
+ done:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return err;
+ }
+@@ -5304,6 +5332,7 @@ send_move_response:
+       l2cap_send_move_chan_rsp(chan, result);
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return 0;
+ }
+@@ -5396,6 +5425,7 @@ static void l2cap_move_continue(struct l
+       }
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
+@@ -5425,6 +5455,7 @@ static void l2cap_move_fail(struct l2cap
+       l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
+@@ -5488,6 +5519,7 @@ static int l2cap_move_channel_confirm(st
+       l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return 0;
+ }
+@@ -5523,6 +5555,7 @@ static inline int l2cap_move_channel_con
+       }
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return 0;
+ }
+@@ -5895,12 +5928,11 @@ static inline int l2cap_le_credits(struc
+       if (credits > max_credits) {
+               BT_ERR("LE credits overflow");
+               l2cap_send_disconn_req(chan, ECONNRESET);
+-              l2cap_chan_unlock(chan);
+               /* Return 0 so that we don't trigger an unnecessary
+                * command reject packet.
+                */
+-              return 0;
++              goto unlock;
+       }
+       chan->tx_credits += credits;
+@@ -5911,7 +5943,9 @@ static inline int l2cap_le_credits(struc
+       if (chan->tx_credits)
+               chan->ops->resume(chan);
++unlock:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+       return 0;
+ }
+@@ -7597,6 +7631,7 @@ drop:
+ done:
+       l2cap_chan_unlock(chan);
++      l2cap_chan_put(chan);
+ }
+ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
+@@ -8085,7 +8120,7 @@ static struct l2cap_chan *l2cap_global_f
+               if (src_type != c->src_type)
+                       continue;
+-              l2cap_chan_hold(c);
++              c = l2cap_chan_hold_unless_zero(c);
+               read_unlock(&chan_list_lock);
+               return c;
+       }
diff --git a/queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch b/queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch
new file mode 100644 (file)
index 0000000..2937a2c
--- /dev/null
@@ -0,0 +1,51 @@
+From 9b134b1694ec8926926ba6b7b80884ea829245a0 Mon Sep 17 00:00:00 2001
+From: Benjamin Poirier <bpoirier@nvidia.com>
+Date: Mon, 25 Jul 2022 09:12:36 +0900
+Subject: bridge: Do not send empty IFLA_AF_SPEC attribute
+
+From: Benjamin Poirier <bpoirier@nvidia.com>
+
+commit 9b134b1694ec8926926ba6b7b80884ea829245a0 upstream.
+
+After commit b6c02ef54913 ("bridge: Netlink interface fix."),
+br_fill_ifinfo() started to send an empty IFLA_AF_SPEC attribute when a
+bridge vlan dump is requested but an interface does not have any vlans
+configured.
+
+iproute2 ignores such an empty attribute since commit b262a9becbcb
+("bridge: Fix output with empty vlan lists") but older iproute2 versions as
+well as other utilities have their output changed by the cited kernel
+commit, resulting in failed test cases. Regardless, emitting an empty
+attribute is pointless and inefficient.
+
+Avoid this change by canceling the attribute if no AF_SPEC data was added.
+
+Fixes: b6c02ef54913 ("bridge: Netlink interface fix.")
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Link: https://lore.kernel.org/r/20220725001236.95062-1-bpoirier@nvidia.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -589,9 +589,13 @@ static int br_fill_ifinfo(struct sk_buff
+       }
+ done:
++      if (af) {
++              if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0))
++                      nla_nest_end(skb, af);
++              else
++                      nla_nest_cancel(skb, af);
++      }
+-      if (af)
+-              nla_nest_end(skb, af);
+       nlmsg_end(skb, nlh);
+       return 0;
diff --git a/queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch b/queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch
new file mode 100644 (file)
index 0000000..96244b4
--- /dev/null
@@ -0,0 +1,55 @@
+From 0c09bc33aa8e9dc867300acaadc318c2f0d85a1e Mon Sep 17 00:00:00 2001
+From: Nathan Chancellor <nathan@kernel.org>
+Date: Mon, 25 Jul 2022 16:36:29 -0700
+Subject: drm/simpledrm: Fix return type of simpledrm_simple_display_pipe_mode_valid()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Nathan Chancellor <nathan@kernel.org>
+
+commit 0c09bc33aa8e9dc867300acaadc318c2f0d85a1e upstream.
+
+When booting a kernel compiled with clang's CFI protection
+(CONFIG_CFI_CLANG), there is a CFI failure in
+drm_simple_kms_crtc_mode_valid() when trying to call
+simpledrm_simple_display_pipe_mode_valid() through ->mode_valid():
+
+[    0.322802] CFI failure (target: simpledrm_simple_display_pipe_mode_valid+0x0/0x8):
+...
+[    0.324928] Call trace:
+[    0.324969]  __ubsan_handle_cfi_check_fail+0x58/0x60
+[    0.325053]  __cfi_check_fail+0x3c/0x44
+[    0.325120]  __cfi_slowpath_diag+0x178/0x200
+[    0.325192]  drm_simple_kms_crtc_mode_valid+0x58/0x80
+[    0.325279]  __drm_helper_update_and_validate+0x31c/0x464
+...
+
+The ->mode_valid() member in 'struct drm_simple_display_pipe_funcs'
+expects a return type of 'enum drm_mode_status', not 'int'. Correct it
+to fix the CFI failure.
+
+Cc: stable@vger.kernel.org
+Fixes: 11e8f5fd223b ("drm: Add simpledrm driver")
+Link: https://github.com/ClangBuiltLinux/linux/issues/1647
+Reported-by: Tomasz PaweÅ‚ Gajc <tpgxyz@gmail.com>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220725233629.223223-1-nathan@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/tiny/simpledrm.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/tiny/simpledrm.c
++++ b/drivers/gpu/drm/tiny/simpledrm.c
+@@ -627,7 +627,7 @@ static const struct drm_connector_funcs
+       .atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+ };
+-static int
++static enum drm_mode_status
+ simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
+                                   const struct drm_display_mode *mode)
+ {
diff --git a/queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch b/queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch
new file mode 100644 (file)
index 0000000..b34b9ce
--- /dev/null
@@ -0,0 +1,119 @@
+From bdeb77bc2c405fa9f954c20269db175a0bd2793f Mon Sep 17 00:00:00 2001
+From: Andrei Vagin <avagin@gmail.com>
+Date: Sat, 16 Jul 2022 21:37:10 -0700
+Subject: fs: sendfile handles O_NONBLOCK of out_fd
+
+From: Andrei Vagin <avagin@gmail.com>
+
+commit bdeb77bc2c405fa9f954c20269db175a0bd2793f upstream.
+
+sendfile has to return EAGAIN if out_fd is nonblocking and the write into
+it would block.
+
+Here is a small reproducer for the problem:
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/sendfile.h>
+
+
+#define FILE_SIZE (1UL << 30)
+int main(int argc, char **argv) {
+        int p[2], fd;
+
+        if (pipe2(p, O_NONBLOCK))
+                return 1;
+
+        fd = open(argv[1], O_RDWR | O_TMPFILE, 0666);
+        if (fd < 0)
+                return 1;
+        ftruncate(fd, FILE_SIZE);
+
+        if (sendfile(p[1], fd, 0, FILE_SIZE) == -1) {
+                fprintf(stderr, "FAIL\n");
+        }
+        if (sendfile(p[1], fd, 0, FILE_SIZE) != -1 || errno != EAGAIN) {
+                fprintf(stderr, "FAIL\n");
+        }
+        return 0;
+}
+
+It worked before b964bf53e540, it is stuck after b964bf53e540, and it
+works again with this fix.
+
+This regression occurred because do_splice_direct() calls pipe_write
+that handles O_NONBLOCK.  Here is a trace log from the reproducer:
+
+ 1)               |  __x64_sys_sendfile64() {
+ 1)               |    do_sendfile() {
+ 1)               |      __fdget()
+ 1)               |      rw_verify_area()
+ 1)               |      __fdget()
+ 1)               |      rw_verify_area()
+ 1)               |      do_splice_direct() {
+ 1)               |        rw_verify_area()
+ 1)               |        splice_direct_to_actor() {
+ 1)               |          do_splice_to() {
+ 1)               |            rw_verify_area()
+ 1)               |            generic_file_splice_read()
+ 1) + 74.153 us   |          }
+ 1)               |          direct_splice_actor() {
+ 1)               |            iter_file_splice_write() {
+ 1)               |              __kmalloc()
+ 1)   0.148 us    |              pipe_lock();
+ 1)   0.153 us    |              splice_from_pipe_next.part.0();
+ 1)   0.162 us    |              page_cache_pipe_buf_confirm();
+... 16 times
+ 1)   0.159 us    |              page_cache_pipe_buf_confirm();
+ 1)               |              vfs_iter_write() {
+ 1)               |                do_iter_write() {
+ 1)               |                  rw_verify_area()
+ 1)               |                  do_iter_readv_writev() {
+ 1)               |                    pipe_write() {
+ 1)               |                      mutex_lock()
+ 1)   0.153 us    |                      mutex_unlock();
+ 1)   1.368 us    |                    }
+ 1)   1.686 us    |                  }
+ 1)   5.798 us    |                }
+ 1)   6.084 us    |              }
+ 1)   0.174 us    |              kfree();
+ 1)   0.152 us    |              pipe_unlock();
+ 1) + 14.461 us   |            }
+ 1) + 14.783 us   |          }
+ 1)   0.164 us    |          page_cache_pipe_buf_release();
+... 16 times
+ 1)   0.161 us    |          page_cache_pipe_buf_release();
+ 1)               |          touch_atime()
+ 1) + 95.854 us   |        }
+ 1) + 99.784 us   |      }
+ 1) ! 107.393 us  |    }
+ 1) ! 107.699 us  |  }
+
+Link: https://lkml.kernel.org/r/20220415005015.525191-1-avagin@gmail.com
+Fixes: b964bf53e540 ("teach sendfile(2) to handle send-to-pipe directly")
+Signed-off-by: Andrei Vagin <avagin@gmail.com>
+Cc: Al Viro <viro@zeniv.linux.org.uk>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/read_write.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/fs/read_write.c
++++ b/fs/read_write.c
+@@ -1247,6 +1247,9 @@ static ssize_t do_sendfile(int out_fd, i
+                                         count, fl);
+               file_end_write(out.file);
+       } else {
++              if (out.file->f_flags & O_NONBLOCK)
++                      fl |= SPLICE_F_NONBLOCK;
++
+               retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
+       }
diff --git a/queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch b/queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch
new file mode 100644 (file)
index 0000000..1b28d07
--- /dev/null
@@ -0,0 +1,36 @@
+From da9a298f5fad0dc615079a340da42928bc5b138e Mon Sep 17 00:00:00 2001
+From: Miaohe Lin <linmiaohe@huawei.com>
+Date: Sat, 9 Jul 2022 17:26:29 +0800
+Subject: hugetlb: fix memoryleak in hugetlb_mcopy_atomic_pte
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+commit da9a298f5fad0dc615079a340da42928bc5b138e upstream.
+
+When alloc_huge_page fails, *pagep is set to NULL without put_page first.
+So the hugepage indicated by *pagep is leaked.
+
+Link: https://lkml.kernel.org/r/20220709092629.54291-1-linmiaohe@huawei.com
+Fixes: 8cc5fcbb5be8 ("mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <songmuchun@bytedance.com>
+Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
+Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -5827,6 +5827,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s
+               page = alloc_huge_page(dst_vma, dst_addr, 0);
+               if (IS_ERR(page)) {
++                      put_page(*pagep);
+                       ret = -ENOMEM;
+                       *pagep = NULL;
+                       goto out;
diff --git a/queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch b/queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch
new file mode 100644 (file)
index 0000000..e048ad1
--- /dev/null
@@ -0,0 +1,53 @@
+From d295ad34f236c3518634fb6403d4c0160456e470 Mon Sep 17 00:00:00 2001
+From: Waiman Long <longman@redhat.com>
+Date: Sat, 23 Jul 2022 15:59:32 -0400
+Subject: intel_idle: Fix false positive RCU splats due to incorrect hardirqs state
+
+From: Waiman Long <longman@redhat.com>
+
+commit d295ad34f236c3518634fb6403d4c0160456e470 upstream.
+
+Commit 32d4fd5751ea ("cpuidle,intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE")
+uses raw_local_irq_enable/local_irq_disable() around call to
+__intel_idle() in intel_idle_irq().
+
+With interrupt enabled, timer tick interrupt can happen and a
+subsequently call to __do_softirq() may change the lockdep hardirqs state
+of a debug kernel back to 'on'. This will result in a mismatch between
+the cpu hardirqs state (off) and the lockdep hardirqs state (on) causing
+a number of false positive "WARNING: suspicious RCU usage" splats.
+
+Fix that by using local_irq_disable() to disable interrupt in
+intel_idle_irq().
+
+Fixes: 32d4fd5751ea ("cpuidle,intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE")
+Signed-off-by: Waiman Long <longman@redhat.com>
+Cc: 5.16+ <stable@vger.kernel.org> # 5.16+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/idle/intel_idle.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index f5c6802aa6c3..907700d1e78e 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -162,7 +162,13 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
+       raw_local_irq_enable();
+       ret = __intel_idle(dev, drv, index);
+-      raw_local_irq_disable();
++
++      /*
++       * The lockdep hardirqs state may be changed to 'on' with timer
++       * tick interrupt followed by __do_softirq(). Use local_irq_disable()
++       * to keep the hardirqs state correct.
++       */
++      local_irq_disable();
+       return ret;
+ }
+-- 
+2.37.1
+
diff --git a/queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch b/queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch
new file mode 100644 (file)
index 0000000..e291fa4
--- /dev/null
@@ -0,0 +1,118 @@
+From f4f451a16dd1f478fdb966bcbb612c1e4ce6b962 Mon Sep 17 00:00:00 2001
+From: Muchun Song <songmuchun@bytedance.com>
+Date: Tue, 5 Jul 2022 20:35:32 +0800
+Subject: mm: fix missing wake-up event for FSDAX pages
+
+From: Muchun Song <songmuchun@bytedance.com>
+
+commit f4f451a16dd1f478fdb966bcbb612c1e4ce6b962 upstream.
+
+FSDAX page refcounts are 1-based, rather than 0-based: if refcount is
+1, then the page is freed.  The FSDAX pages can be pinned through GUP,
+then they will be unpinned via unpin_user_page() using a folio variant
+to put the page, however, folio variants did not consider this special
+case, the result will be to miss a wakeup event (like the user of
+__fuse_dax_break_layouts()).  This results in a task being permanently
+stuck in TASK_INTERRUPTIBLE state.
+
+Since FSDAX pages are only possibly obtained by GUP users, so fix GUP
+instead of folio_put() to lower overhead.
+
+Link: https://lkml.kernel.org/r/20220705123532.283-1-songmuchun@bytedance.com
+Fixes: d8ddc099c6b3 ("mm/gup: Add gup_put_folio()")
+Signed-off-by: Muchun Song <songmuchun@bytedance.com>
+Suggested-by: Matthew Wilcox <willy@infradead.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: John Hubbard <jhubbard@nvidia.com>
+Cc: William Kucharski <william.kucharski@oracle.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/mm.h |   14 +++++++++-----
+ mm/gup.c           |    6 ++++--
+ mm/memremap.c      |    6 +++---
+ 3 files changed, 16 insertions(+), 10 deletions(-)
+
+--- a/include/linux/mm.h
++++ b/include/linux/mm.h
+@@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page(
+ #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
+ DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
+-bool __put_devmap_managed_page(struct page *page);
+-static inline bool put_devmap_managed_page(struct page *page)
++bool __put_devmap_managed_page_refs(struct page *page, int refs);
++static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+ {
+       if (!static_branch_unlikely(&devmap_managed_key))
+               return false;
+       if (!is_zone_device_page(page))
+               return false;
+-      return __put_devmap_managed_page(page);
++      return __put_devmap_managed_page_refs(page, refs);
+ }
+-
+ #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
+-static inline bool put_devmap_managed_page(struct page *page)
++static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
+ {
+       return false;
+ }
+ #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
++static inline bool put_devmap_managed_page(struct page *page)
++{
++      return put_devmap_managed_page_refs(page, 1);
++}
++
+ /* 127: arbitrary random number, small enough to assemble well */
+ #define folio_ref_zero_or_close_to_overflow(folio) \
+       ((unsigned int) folio_ref_count(folio) + 127u <= 127u)
+--- a/mm/gup.c
++++ b/mm/gup.c
+@@ -54,7 +54,8 @@ retry:
+        * belongs to this folio.
+        */
+       if (unlikely(page_folio(page) != folio)) {
+-              folio_put_refs(folio, refs);
++              if (!put_devmap_managed_page_refs(&folio->page, refs))
++                      folio_put_refs(folio, refs);
+               goto retry;
+       }
+@@ -143,7 +144,8 @@ static void gup_put_folio(struct folio *
+                       refs *= GUP_PIN_COUNTING_BIAS;
+       }
+-      folio_put_refs(folio, refs);
++      if (!put_devmap_managed_page_refs(&folio->page, refs))
++              folio_put_refs(folio, refs);
+ }
+ /**
+--- a/mm/memremap.c
++++ b/mm/memremap.c
+@@ -489,7 +489,7 @@ void free_zone_device_page(struct page *
+ }
+ #ifdef CONFIG_FS_DAX
+-bool __put_devmap_managed_page(struct page *page)
++bool __put_devmap_managed_page_refs(struct page *page, int refs)
+ {
+       if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
+               return false;
+@@ -499,9 +499,9 @@ bool __put_devmap_managed_page(struct pa
+        * refcount is 1, then the page is free and the refcount is
+        * stable because nobody holds a reference on the page.
+        */
+-      if (page_ref_dec_return(page) == 1)
++      if (page_ref_sub_return(page, refs) == 1)
+               wake_up_var(&page->_refcount);
+       return true;
+ }
+-EXPORT_SYMBOL(__put_devmap_managed_page);
++EXPORT_SYMBOL(__put_devmap_managed_page_refs);
+ #endif /* CONFIG_FS_DAX */
diff --git a/queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch b/queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch
new file mode 100644 (file)
index 0000000..9ed53bc
--- /dev/null
@@ -0,0 +1,72 @@
+From 3fe2895cfecd03ac74977f32102b966b6589f481 Mon Sep 17 00:00:00 2001
+From: Josef Bacik <josef@toxicpanda.com>
+Date: Tue, 5 Jul 2022 16:00:36 -0400
+Subject: mm: fix page leak with multiple threads mapping the same page
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+commit 3fe2895cfecd03ac74977f32102b966b6589f481 upstream.
+
+We have an application with a lot of threads that use a shared mmap backed
+by tmpfs mounted with -o huge=within_size.  This application started
+leaking loads of huge pages when we upgraded to a recent kernel.
+
+Using the page ref tracepoints and a BPF program written by Tejun Heo we
+were able to determine that these pages would have multiple refcounts from
+the page fault path, but when it came to unmap time we wouldn't drop the
+number of refs we had added from the faults.
+
+I wrote a reproducer that mmap'ed a file backed by tmpfs with -o
+huge=always, and then spawned 20 threads all looping faulting random
+offsets in this map, while using madvise(MADV_DONTNEED) randomly for huge
+page aligned ranges.  This very quickly reproduced the problem.
+
+The problem here is that we check for the case that we have multiple
+threads faulting in a range that was previously unmapped.  One thread maps
+the PMD, the other thread loses the race and then returns 0.  However at
+this point we already have the page, and we are no longer putting this
+page into the processes address space, and so we leak the page.  We
+actually did the correct thing prior to f9ce0be71d1f, however it looks
+like Kirill copied what we do in the anonymous page case.  In the
+anonymous page case we don't yet have a page, so we don't have to drop a
+reference on anything.  Previously we did the correct thing for file based
+faults by returning VM_FAULT_NOPAGE so we correctly drop the reference on
+the page we faulted in.
+
+Fix this by returning VM_FAULT_NOPAGE in the pmd_devmap_trans_unstable()
+case, this makes us drop the ref on the page properly, and now my
+reproducer no longer leaks the huge pages.
+
+[josef@toxicpanda.com: v2]
+  Link: https://lkml.kernel.org/r/e90c8f0dbae836632b669c2afc434006a00d4a67.1657721478.git.josef@toxicpanda.com
+Link: https://lkml.kernel.org/r/2b798acfd95c9ab9395fe85e8d5a835e2e10a920.1657051137.git.josef@toxicpanda.com
+Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths")
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Rik van Riel <riel@surriel.com>
+Signed-off-by: Chris Mason <clm@fb.com>
+Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
+Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -4108,9 +4108,12 @@ vm_fault_t finish_fault(struct vm_fault
+                       return VM_FAULT_OOM;
+       }
+-      /* See comment in handle_pte_fault() */
++      /*
++       * See comment in handle_pte_fault() for how this scenario happens, we
++       * need to return NOPAGE so that we drop this page.
++       */
+       if (pmd_devmap_trans_unstable(vmf->pmd))
+-              return 0;
++              return VM_FAULT_NOPAGE;
+       vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
+                                     vmf->address, &vmf->ptl);
diff --git a/queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch b/queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch
new file mode 100644 (file)
index 0000000..c4cf789
--- /dev/null
@@ -0,0 +1,56 @@
+From c2cb0dcce9dd8b748b6ca8bb8d4a389f2e232307 Mon Sep 17 00:00:00 2001
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Date: Mon, 4 Jul 2022 10:33:05 +0900
+Subject: mm/hugetlb: separate path for hwpoison entry in copy_hugetlb_page_range()
+
+From: Naoya Horiguchi <naoya.horiguchi@nec.com>
+
+commit c2cb0dcce9dd8b748b6ca8bb8d4a389f2e232307 upstream.
+
+Originally copy_hugetlb_page_range() handles migration entries and
+hwpoisoned entries in similar manner.  But recently the related code path
+has more code for migration entries, and when
+is_writable_migration_entry() was converted to
+!is_readable_migration_entry(), hwpoison entries on source processes got
+to be unexpectedly updated (which is legitimate for migration entries, but
+not for hwpoison entries).  This results in unexpected serious issues like
+kernel panic when forking processes with hwpoison entries in pmd.
+
+Separate the if branch into one for hwpoison entries and one for migration
+entries.
+
+Link: https://lkml.kernel.org/r/20220704013312.2415700-3-naoya.horiguchi@linux.dev
+Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive")
+Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
+Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
+Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Cc: <stable@vger.kernel.org>   [5.18]
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Liu Shixin <liushixin2@huawei.com>
+Cc: Oscar Salvador <osalvador@suse.de>
+Cc: Yang Shi <shy828301@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/hugetlb.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -4764,8 +4764,13 @@ again:
+                        * sharing with another vma.
+                        */
+                       ;
+-              } else if (unlikely(is_hugetlb_entry_migration(entry) ||
+-                                  is_hugetlb_entry_hwpoisoned(entry))) {
++              } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
++                      bool uffd_wp = huge_pte_uffd_wp(entry);
++
++                      if (!userfaultfd_wp(dst_vma) && uffd_wp)
++                              entry = huge_pte_clear_uffd_wp(entry);
++                      set_huge_pte_at(dst, addr, dst_pte, entry);
++              } else if (unlikely(is_hugetlb_entry_migration(entry))) {
+                       swp_entry_t swp_entry = pte_to_swp_entry(entry);
+                       if (is_writable_migration_entry(swp_entry) && cow) {
diff --git a/queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch b/queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch
new file mode 100644 (file)
index 0000000..7504791
--- /dev/null
@@ -0,0 +1,46 @@
+From 66cee9097e2b74ff3c8cc040ce5717c521a0c3fa Mon Sep 17 00:00:00 2001
+From: Alistair Popple <apopple@nvidia.com>
+Date: Wed, 20 Jul 2022 16:27:45 +1000
+Subject: nouveau/svm: Fix to migrate all requested pages
+
+From: Alistair Popple <apopple@nvidia.com>
+
+commit 66cee9097e2b74ff3c8cc040ce5717c521a0c3fa upstream.
+
+Users may request that pages from an OpenCL SVM allocation be migrated
+to the GPU with clEnqueueSVMMigrateMem(). In Nouveau this will call into
+nouveau_dmem_migrate_vma() to do the migration. If the total range to be
+migrated exceeds SG_MAX_SINGLE_ALLOC the pages will be migrated in
+chunks of size SG_MAX_SINGLE_ALLOC. However a typo in updating the
+starting address means that only the first chunk will get migrated.
+
+Fix the calculation so that the entire range will get migrated if
+possible.
+
+Signed-off-by: Alistair Popple <apopple@nvidia.com>
+Fixes: e3d8b0890469 ("drm/nouveau/svm: map pages after migration")
+Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220720062745.960701-1-apopple@nvidia.com
+Cc: <stable@vger.kernel.org> # v5.8+
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_dmem.c |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
+@@ -680,7 +680,11 @@ nouveau_dmem_migrate_vma(struct nouveau_
+               goto out_free_dma;
+       for (i = 0; i < npages; i += max) {
+-              args.end = start + (max << PAGE_SHIFT);
++              if (args.start + (max << PAGE_SHIFT) > end)
++                      args.end = end;
++              else
++                      args.end = args.start + (max << PAGE_SHIFT);
++
+               ret = migrate_vma_setup(&args);
+               if (ret)
+                       goto out_free_pfns;
diff --git a/queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch b/queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch
new file mode 100644 (file)
index 0000000..ad4c907
--- /dev/null
@@ -0,0 +1,107 @@
+From 38c9c22a85aeed28d0831f230136e9cf6fa2ed44 Mon Sep 17 00:00:00 2001
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+Date: Thu, 7 Jul 2022 18:53:29 +0800
+Subject: ntfs: fix use-after-free in ntfs_ucsncmp()
+
+From: ChenXiaoSong <chenxiaosong2@huawei.com>
+
+commit 38c9c22a85aeed28d0831f230136e9cf6fa2ed44 upstream.
+
+Syzkaller reported use-after-free bug as follows:
+
+==================================================================
+BUG: KASAN: use-after-free in ntfs_ucsncmp+0x123/0x130
+Read of size 2 at addr ffff8880751acee8 by task a.out/879
+
+CPU: 7 PID: 879 Comm: a.out Not tainted 5.19.0-rc4-next-20220630-00001-gcc5218c8bd2c-dirty #7
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x1c0/0x2b0
+ print_address_description.constprop.0.cold+0xd4/0x484
+ print_report.cold+0x55/0x232
+ kasan_report+0xbf/0xf0
+ ntfs_ucsncmp+0x123/0x130
+ ntfs_are_names_equal.cold+0x2b/0x41
+ ntfs_attr_find+0x43b/0xb90
+ ntfs_attr_lookup+0x16d/0x1e0
+ ntfs_read_locked_attr_inode+0x4aa/0x2360
+ ntfs_attr_iget+0x1af/0x220
+ ntfs_read_locked_inode+0x246c/0x5120
+ ntfs_iget+0x132/0x180
+ load_system_files+0x1cc6/0x3480
+ ntfs_fill_super+0xa66/0x1cf0
+ mount_bdev+0x38d/0x460
+ legacy_get_tree+0x10d/0x220
+ vfs_get_tree+0x93/0x300
+ do_new_mount+0x2da/0x6d0
+ path_mount+0x496/0x19d0
+ __x64_sys_mount+0x284/0x300
+ do_syscall_64+0x3b/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+RIP: 0033:0x7f3f2118d9ea
+Code: 48 8b 0d a9 f4 0b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 76 f4 0b 00 f7 d8 64 89 01 48
+RSP: 002b:00007ffc269deac8 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3f2118d9ea
+RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffc269dec00
+RBP: 00007ffc269dec80 R08: 00007ffc269deb00 R09: 00007ffc269dec44
+R10: 0000000000000000 R11: 0000000000000202 R12: 000055f81ab1d220
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ </TASK>
+
+The buggy address belongs to the physical page:
+page:0000000085430378 refcount:1 mapcount:1 mapping:0000000000000000 index:0x555c6a81d pfn:0x751ac
+memcg:ffff888101f7e180
+anon flags: 0xfffffc00a0014(uptodate|lru|mappedtodisk|swapbacked|node=0|zone=1|lastcpupid=0x1fffff)
+raw: 000fffffc00a0014 ffffea0001bf2988 ffffea0001de2448 ffff88801712e201
+raw: 0000000555c6a81d 0000000000000000 0000000100000000 ffff888101f7e180
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff8880751acd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffff8880751ace00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+>ffff8880751ace80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+                                                          ^
+ ffff8880751acf00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffff8880751acf80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+==================================================================
+
+The reason is that struct ATTR_RECORD->name_offset is 6485, end address of
+name string is out of bounds.
+
+Fix this by adding sanity check on end address of attribute name string.
+
+[akpm@linux-foundation.org: coding-style cleanups]
+[chenxiaosong2@huawei.com: cleanup suggested by Hawkins Jiawei]
+  Link: https://lkml.kernel.org/r/20220709064511.3304299-1-chenxiaosong2@huawei.com
+Link: https://lkml.kernel.org/r/20220707105329.4020708-1-chenxiaosong2@huawei.com
+Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
+Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
+Cc: Anton Altaparmakov <anton@tuxera.com>
+Cc: ChenXiaoSong <chenxiaosong2@huawei.com>
+Cc: Yongqiang Liu <liuyongqiang13@huawei.com>
+Cc: Zhang Yi <yi.zhang@huawei.com>
+Cc: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ntfs/attrib.c |    8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+--- a/fs/ntfs/attrib.c
++++ b/fs/ntfs/attrib.c
+@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYP
+               a = (ATTR_RECORD*)((u8*)ctx->attr +
+                               le32_to_cpu(ctx->attr->length));
+       for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
+-              if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
+-                              le32_to_cpu(ctx->mrec->bytes_allocated))
++              u8 *mrec_end = (u8 *)ctx->mrec +
++                             le32_to_cpu(ctx->mrec->bytes_allocated);
++              u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
++                             a->name_length * sizeof(ntfschar);
++              if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
++                  name_end > mrec_end)
+                       break;
+               ctx->attr = a;
+               if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
diff --git a/queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch b/queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch
new file mode 100644 (file)
index 0000000..59e065c
--- /dev/null
@@ -0,0 +1,243 @@
+From c80af0c250c8f8a3c978aa5aafbe9c39b336b813 Mon Sep 17 00:00:00 2001
+From: Junxiao Bi <ocfs2-devel@oss.oracle.com>
+Date: Fri, 3 Jun 2022 15:28:01 -0700
+Subject: Revert "ocfs2: mount shared volume without ha stack"
+
+From: Junxiao Bi <ocfs2-devel@oss.oracle.com>
+
+commit c80af0c250c8f8a3c978aa5aafbe9c39b336b813 upstream.
+
+This reverts commit 912f655d78c5d4ad05eac287f23a435924df7144.
+
+This commit introduced a regression that can cause mount hung.  The
+changes in __ocfs2_find_empty_slot causes that any node with none-zero
+node number can grab the slot that was already taken by node 0, so node 1
+will access the same journal with node 0, when it try to grab journal
+cluster lock, it will hung because it was already acquired by node 0.
+It's very easy to reproduce this, in one cluster, mount node 0 first, then
+node 1, you will see the following call trace from node 1.
+
+[13148.735424] INFO: task mount.ocfs2:53045 blocked for more than 122 seconds.
+[13148.739691]       Not tainted 5.15.0-2148.0.4.el8uek.mountracev2.x86_64 #2
+[13148.742560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+[13148.745846] task:mount.ocfs2     state:D stack:    0 pid:53045 ppid: 53044 flags:0x00004000
+[13148.749354] Call Trace:
+[13148.750718]  <TASK>
+[13148.752019]  ? usleep_range+0x90/0x89
+[13148.753882]  __schedule+0x210/0x567
+[13148.755684]  schedule+0x44/0xa8
+[13148.757270]  schedule_timeout+0x106/0x13c
+[13148.759273]  ? __prepare_to_swait+0x53/0x78
+[13148.761218]  __wait_for_common+0xae/0x163
+[13148.763144]  __ocfs2_cluster_lock.constprop.0+0x1d6/0x870 [ocfs2]
+[13148.765780]  ? ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2]
+[13148.768312]  ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2]
+[13148.770968]  ocfs2_journal_init+0x91/0x340 [ocfs2]
+[13148.773202]  ocfs2_check_volume+0x39/0x461 [ocfs2]
+[13148.775401]  ? iput+0x69/0xba
+[13148.777047]  ocfs2_mount_volume.isra.0.cold+0x40/0x1f5 [ocfs2]
+[13148.779646]  ocfs2_fill_super+0x54b/0x853 [ocfs2]
+[13148.781756]  mount_bdev+0x190/0x1b7
+[13148.783443]  ? ocfs2_remount+0x440/0x440 [ocfs2]
+[13148.785634]  legacy_get_tree+0x27/0x48
+[13148.787466]  vfs_get_tree+0x25/0xd0
+[13148.789270]  do_new_mount+0x18c/0x2d9
+[13148.791046]  __x64_sys_mount+0x10e/0x142
+[13148.792911]  do_syscall_64+0x3b/0x89
+[13148.794667]  entry_SYSCALL_64_after_hwframe+0x170/0x0
+[13148.797051] RIP: 0033:0x7f2309f6e26e
+[13148.798784] RSP: 002b:00007ffdcee7d408 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5
+[13148.801974] RAX: ffffffffffffffda RBX: 00007ffdcee7d4a0 RCX: 00007f2309f6e26e
+[13148.804815] RDX: 0000559aa762a8ae RSI: 0000559aa939d340 RDI: 0000559aa93a22b0
+[13148.807719] RBP: 00007ffdcee7d5b0 R08: 0000559aa93a2290 R09: 00007f230a0b4820
+[13148.810659] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcee7d420
+[13148.813609] R13: 0000000000000000 R14: 0000559aa939f000 R15: 0000000000000000
+[13148.816564]  </TASK>
+
+To fix it, we can just fix __ocfs2_find_empty_slot.  But original commit
+introduced the feature to mount ocfs2 locally even it is cluster based,
+that is a very dangerous, it can easily cause serious data corruption,
+there is no way to stop other nodes mounting the fs and corrupting it.
+Setup ha or other cluster-aware stack is just the cost that we have to
+take for avoiding corruption, otherwise we have to do it in kernel.
+
+Link: https://lkml.kernel.org/r/20220603222801.42488-1-junxiao.bi@oracle.com
+Fixes: 912f655d78c5("ocfs2: mount shared volume without ha stack")
+Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
+Acked-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <heming.zhao@suse.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/ocfs2/ocfs2.h    |    4 +---
+ fs/ocfs2/slot_map.c |   46 +++++++++++++++++++---------------------------
+ fs/ocfs2/super.c    |   21 ---------------------
+ 3 files changed, 20 insertions(+), 51 deletions(-)
+
+--- a/fs/ocfs2/ocfs2.h
++++ b/fs/ocfs2/ocfs2.h
+@@ -277,7 +277,6 @@ enum ocfs2_mount_options
+       OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15,  /* Journal Async Commit */
+       OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
+       OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
+-      OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
+ };
+ #define OCFS2_OSB_SOFT_RO     0x0001
+@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_glo
+ static inline int ocfs2_mount_local(struct ocfs2_super *osb)
+ {
+-      return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
+-              || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
++      return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
+ }
+ static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
+--- a/fs/ocfs2/slot_map.c
++++ b/fs/ocfs2/slot_map.c
+@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struc
+       int i, ret = -ENOSPC;
+       if ((preferred >= 0) && (preferred < si->si_num_slots)) {
+-              if (!si->si_slots[preferred].sl_valid ||
+-                  !si->si_slots[preferred].sl_node_num) {
++              if (!si->si_slots[preferred].sl_valid) {
+                       ret = preferred;
+                       goto out;
+               }
+       }
+       for(i = 0; i < si->si_num_slots; i++) {
+-              if (!si->si_slots[i].sl_valid ||
+-                  !si->si_slots[i].sl_node_num) {
++              if (!si->si_slots[i].sl_valid) {
+                       ret = i;
+                       break;
+               }
+@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super *
+       spin_lock(&osb->osb_lock);
+       ocfs2_update_slot_info(si);
+-      if (ocfs2_mount_local(osb))
+-              /* use slot 0 directly in local mode */
+-              slot = 0;
+-      else {
+-              /* search for ourselves first and take the slot if it already
+-               * exists. Perhaps we need to mark this in a variable for our
+-               * own journal recovery? Possibly not, though we certainly
+-               * need to warn to the user */
+-              slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++      /* search for ourselves first and take the slot if it already
++       * exists. Perhaps we need to mark this in a variable for our
++       * own journal recovery? Possibly not, though we certainly
++       * need to warn to the user */
++      slot = __ocfs2_node_num_to_slot(si, osb->node_num);
++      if (slot < 0) {
++              /* if no slot yet, then just take 1st available
++               * one. */
++              slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+               if (slot < 0) {
+-                      /* if no slot yet, then just take 1st available
+-                       * one. */
+-                      slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
+-                      if (slot < 0) {
+-                              spin_unlock(&osb->osb_lock);
+-                              mlog(ML_ERROR, "no free slots available!\n");
+-                              status = -EINVAL;
+-                              goto bail;
+-                      }
+-              } else
+-                      printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
+-                             "already allocated to this node!\n",
+-                             slot, osb->dev_str);
+-      }
++                      spin_unlock(&osb->osb_lock);
++                      mlog(ML_ERROR, "no free slots available!\n");
++                      status = -EINVAL;
++                      goto bail;
++              }
++      } else
++              printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
++                     "allocated to this node!\n", slot, osb->dev_str);
+       ocfs2_set_slot(si, slot, osb->node_num);
+       osb->slot_num = slot;
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -172,7 +172,6 @@ enum {
+       Opt_dir_resv_level,
+       Opt_journal_async_commit,
+       Opt_err_cont,
+-      Opt_nocluster,
+       Opt_err,
+ };
+@@ -206,7 +205,6 @@ static const match_table_t tokens = {
+       {Opt_dir_resv_level, "dir_resv_level=%u"},
+       {Opt_journal_async_commit, "journal_async_commit"},
+       {Opt_err_cont, "errors=continue"},
+-      {Opt_nocluster, "nocluster"},
+       {Opt_err, NULL}
+ };
+@@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_bl
+               goto out;
+       }
+-      tmp = OCFS2_MOUNT_NOCLUSTER;
+-      if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+-              ret = -EINVAL;
+-              mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
+-              goto out;
+-      }
+-
+       tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
+               OCFS2_MOUNT_HB_NONE;
+       if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
+@@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack(
+       }
+       if (ocfs2_userspace_stack(osb) &&
+-          !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+           strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
+                   OCFS2_STACK_LABEL_LEN)) {
+               mlog(ML_ERROR,
+@@ -1144,11 +1134,6 @@ static int ocfs2_fill_super(struct super
+              osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
+              "ordered");
+-      if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
+-         !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
+-              printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
+-                     "without cluster aware mode.\n", osb->dev_str);
+-
+       atomic_set(&osb->vol_state, VOLUME_MOUNTED);
+       wake_up(&osb->osb_mount_event);
+@@ -1455,9 +1440,6 @@ static int ocfs2_parse_options(struct su
+               case Opt_journal_async_commit:
+                       mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
+                       break;
+-              case Opt_nocluster:
+-                      mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
+-                      break;
+               default:
+                       mlog(ML_ERROR,
+                            "Unrecognized mount option \"%s\" "
+@@ -1569,9 +1551,6 @@ static int ocfs2_show_options(struct seq
+       if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
+               seq_printf(s, ",journal_async_commit");
+-      if (opts & OCFS2_MOUNT_NOCLUSTER)
+-              seq_printf(s, ",nocluster");
+-
+       return 0;
+ }
diff --git a/queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch b/queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch
new file mode 100644 (file)
index 0000000..2f13db2
--- /dev/null
@@ -0,0 +1,125 @@
+From 918e75f77af7d2e049bb70469ec0a2c12782d96a Mon Sep 17 00:00:00 2001
+From: Harald Freudenberger <freude@linux.ibm.com>
+Date: Wed, 13 Jul 2022 15:17:21 +0200
+Subject: s390/archrandom: prevent CPACF trng invocations in interrupt context
+
+From: Harald Freudenberger <freude@linux.ibm.com>
+
+commit 918e75f77af7d2e049bb70469ec0a2c12782d96a upstream.
+
+This patch slightly reworks the s390 arch_get_random_seed_{int,long}
+implementation: Make sure the CPACF trng instruction is never
+called in any interrupt context. This is done by adding an
+additional condition in_task().
+
+Justification:
+
+There are some constrains to satisfy for the invocation of the
+arch_get_random_seed_{int,long}() functions:
+- They should provide good random data during kernel initialization.
+- They should not be called in interrupt context as the TRNG
+  instruction is relatively heavy weight and may for example
+  make some network loads cause to timeout and buck.
+
+However, it was not clear what kind of interrupt context is exactly
+encountered during kernel init or network traffic eventually calling
+arch_get_random_seed_long().
+
+After some days of investigations it is clear that the s390
+start_kernel function is not running in any interrupt context and
+so the trng is called:
+
+Jul 11 18:33:39 t35lp54 kernel:  [<00000001064e90ca>] arch_get_random_seed_long.part.0+0x32/0x70
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010715f246>] random_init+0xf6/0x238
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010712545c>] start_kernel+0x4a4/0x628
+Jul 11 18:33:39 t35lp54 kernel:  [<000000010590402a>] startup_continue+0x2a/0x40
+
+The condition in_task() is true and the CPACF trng provides random data
+during kernel startup.
+
+The network traffic however, is more difficult. A typical call stack
+looks like this:
+
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5600fc>] extract_entropy.constprop.0+0x23c/0x240
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b560136>] crng_reseed+0x36/0xd8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5604b8>] crng_make_state+0x78/0x340
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b5607e0>] _get_random_bytes+0x60/0xf8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b56108a>] get_random_u32+0xda/0x248
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aefe7a8>] kfence_guarded_alloc+0x48/0x4b8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aeff35e>] __kfence_alloc+0x18e/0x1b8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008aef7f10>] __kmalloc_node_track_caller+0x368/0x4d8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b611eac>] kmalloc_reserve+0x44/0xa0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b611f98>] __alloc_skb+0x90/0x178
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b6120dc>] __napi_alloc_skb+0x5c/0x118
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b8f06b4>] qeth_extract_skb+0x13c/0x680
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b8f6526>] qeth_poll+0x256/0x3f8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b63d76e>] __napi_poll.constprop.0+0x46/0x2f8
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b63dbec>] net_rx_action+0x1cc/0x408
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b937302>] __do_softirq+0x132/0x6b0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abf46ce>] __irq_exit_rcu+0x13e/0x170
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abf531a>] irq_exit_rcu+0x22/0x50
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b922506>] do_io_irq+0xe6/0x198
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b935826>] io_int_handler+0xd6/0x110
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b9358a6>] psw_idle_exit+0x0/0xa
+Jul 06 17:37:07 t35lp54 kernel: ([<000000008ab9c59a>] arch_cpu_idle+0x52/0xe0)
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b933cfe>] default_idle_call+0x6e/0xd0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008ac59f4e>] do_idle+0xf6/0x1b0
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008ac5a28e>] cpu_startup_entry+0x36/0x40
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008abb0d90>] smp_start_secondary+0x148/0x158
+Jul 06 17:37:07 t35lp54 kernel:  [<000000008b935b9e>] restart_int_handler+0x6e/0x90
+
+which confirms that the call is in softirq context. So in_task() covers exactly
+the cases where we want to have CPACF trng called: not in nmi, not in hard irq,
+not in soft irq but in normal task context and during kernel init.
+
+Signed-off-by: Harald Freudenberger <freude@linux.ibm.com>
+Acked-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Reviewed-by: Juergen Christ <jchrist@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220713131721.257907-1-freude@linux.ibm.com
+Fixes: e4f74400308c ("s390/archrandom: simplify back to earlier design and initialize earlier")
+[agordeev@linux.ibm.com changed desc, added Fixes and Link, removed -stable]
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/s390/include/asm/archrandom.h |    9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+--- a/arch/s390/include/asm/archrandom.h
++++ b/arch/s390/include/asm/archrandom.h
+@@ -2,7 +2,7 @@
+ /*
+  * Kernel interface for the s390 arch_random_* functions
+  *
+- * Copyright IBM Corp. 2017, 2020
++ * Copyright IBM Corp. 2017, 2022
+  *
+  * Author: Harald Freudenberger <freude@de.ibm.com>
+  *
+@@ -14,6 +14,7 @@
+ #ifdef CONFIG_ARCH_RANDOM
+ #include <linux/static_key.h>
++#include <linux/preempt.h>
+ #include <linux/atomic.h>
+ #include <asm/cpacf.h>
+@@ -32,7 +33,8 @@ static inline bool __must_check arch_get
+ static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
+@@ -42,7 +44,8 @@ static inline bool __must_check arch_get
+ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ {
+-      if (static_branch_likely(&s390_arch_random_available)) {
++      if (static_branch_likely(&s390_arch_random_available) &&
++          in_task()) {
+               cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+               atomic64_add(sizeof(*v), &s390_arch_random_counter);
+               return true;
diff --git a/queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch b/queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch
new file mode 100644 (file)
index 0000000..71a7977
--- /dev/null
@@ -0,0 +1,164 @@
+From 84ac013046ccc438af04b7acecd4d3ab84fe4bde Mon Sep 17 00:00:00 2001
+From: Mike Rapoport <rppt@linux.ibm.com>
+Date: Thu, 7 Jul 2022 19:56:50 +0300
+Subject: secretmem: fix unhandled fault in truncate
+
+From: Mike Rapoport <rppt@linux.ibm.com>
+
+commit 84ac013046ccc438af04b7acecd4d3ab84fe4bde upstream.
+
+syzkaller reports the following issue:
+
+BUG: unable to handle page fault for address: ffff888021f7e005
+PGD 11401067 P4D 11401067 PUD 11402067 PMD 21f7d063 PTE 800fffffde081060
+Oops: 0002 [#1] PREEMPT SMP KASAN
+CPU: 0 PID: 3761 Comm: syz-executor281 Not tainted 5.19.0-rc4-syzkaller-00014-g941e3e791269 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+RIP: 0010:memset_erms+0x9/0x10 arch/x86/lib/memset_64.S:64
+Code: c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 f3 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 <f3> aa 4c 89 c8 c3 90 49 89 fa 40 0f b6 ce 48 b8 01 01 01 01 01 01
+RSP: 0018:ffffc9000329fa90 EFLAGS: 00010202
+RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000ffb
+RDX: 0000000000000ffb RSI: 0000000000000000 RDI: ffff888021f7e005
+RBP: ffffea000087df80 R08: 0000000000000001 R09: ffff888021f7e005
+R10: ffffed10043efdff R11: 0000000000000000 R12: 0000000000000005
+R13: 0000000000000000 R14: 0000000000001000 R15: 0000000000000ffb
+FS:  00007fb29d8b2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffff888021f7e005 CR3: 0000000026e7b000 CR4: 00000000003506f0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ zero_user_segments include/linux/highmem.h:272 [inline]
+ folio_zero_range include/linux/highmem.h:428 [inline]
+ truncate_inode_partial_folio+0x76a/0xdf0 mm/truncate.c:237
+ truncate_inode_pages_range+0x83b/0x1530 mm/truncate.c:381
+ truncate_inode_pages mm/truncate.c:452 [inline]
+ truncate_pagecache+0x63/0x90 mm/truncate.c:753
+ simple_setattr+0xed/0x110 fs/libfs.c:535
+ secretmem_setattr+0xae/0xf0 mm/secretmem.c:170
+ notify_change+0xb8c/0x12b0 fs/attr.c:424
+ do_truncate+0x13c/0x200 fs/open.c:65
+ do_sys_ftruncate+0x536/0x730 fs/open.c:193
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+RIP: 0033:0x7fb29d900899
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007fb29d8b2318 EFLAGS: 00000246 ORIG_RAX: 000000000000004d
+RAX: ffffffffffffffda RBX: 00007fb29d988408 RCX: 00007fb29d900899
+RDX: 00007fb29d900899 RSI: 0000000000000005 RDI: 0000000000000003
+RBP: 00007fb29d988400 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb29d98840c
+R13: 00007ffca01a23bf R14: 00007fb29d8b2400 R15: 0000000000022000
+ </TASK>
+Modules linked in:
+CR2: ffff888021f7e005
+---[ end trace 0000000000000000 ]---
+
+Eric Biggers suggested that this happens when
+secretmem_setattr()->simple_setattr() races with secretmem_fault() so that
+a page that is faulted in by secretmem_fault() (and thus removed from the
+direct map) is zeroed by inode truncation right afterwards.
+
+Use mapping->invalidate_lock to make secretmem_fault() and
+secretmem_setattr() mutually exclusive.
+
+[rppt@linux.ibm.com: v3]
+  Link: https://lkml.kernel.org/r/20220714091337.412297-1-rppt@kernel.org
+Link: https://lkml.kernel.org/r/20220707165650.248088-1-rppt@kernel.org
+Reported-by: syzbot+9bd2b7adbd34b30b87e4@syzkaller.appspotmail.com
+Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
+Suggested-by: Eric Biggers <ebiggers@kernel.org>
+Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Cc: Eric Biggers <ebiggers@kernel.org>
+Cc: Hillf Danton <hdanton@sina.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/secretmem.c |   33 ++++++++++++++++++++++++++-------
+ 1 file changed, 26 insertions(+), 7 deletions(-)
+
+--- a/mm/secretmem.c
++++ b/mm/secretmem.c
+@@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct
+       gfp_t gfp = vmf->gfp_mask;
+       unsigned long addr;
+       struct page *page;
++      vm_fault_t ret;
+       int err;
+       if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
+               return vmf_error(-EINVAL);
++      filemap_invalidate_lock_shared(mapping);
++
+ retry:
+       page = find_lock_page(mapping, offset);
+       if (!page) {
+               page = alloc_page(gfp | __GFP_ZERO);
+-              if (!page)
+-                      return VM_FAULT_OOM;
++              if (!page) {
++                      ret = VM_FAULT_OOM;
++                      goto out;
++              }
+               err = set_direct_map_invalid_noflush(page);
+               if (err) {
+                       put_page(page);
+-                      return vmf_error(err);
++                      ret = vmf_error(err);
++                      goto out;
+               }
+               __SetPageUptodate(page);
+@@ -86,7 +92,8 @@ retry:
+                       if (err == -EEXIST)
+                               goto retry;
+-                      return vmf_error(err);
++                      ret = vmf_error(err);
++                      goto out;
+               }
+               addr = (unsigned long)page_address(page);
+@@ -94,7 +101,11 @@ retry:
+       }
+       vmf->page = page;
+-      return VM_FAULT_LOCKED;
++      ret = VM_FAULT_LOCKED;
++
++out:
++      filemap_invalidate_unlock_shared(mapping);
++      return ret;
+ }
+ static const struct vm_operations_struct secretmem_vm_ops = {
+@@ -162,12 +173,20 @@ static int secretmem_setattr(struct user
+                            struct dentry *dentry, struct iattr *iattr)
+ {
+       struct inode *inode = d_inode(dentry);
++      struct address_space *mapping = inode->i_mapping;
+       unsigned int ia_valid = iattr->ia_valid;
++      int ret;
++
++      filemap_invalidate_lock(mapping);
+       if ((ia_valid & ATTR_SIZE) && inode->i_size)
+-              return -EINVAL;
++              ret = -EINVAL;
++      else
++              ret = simple_setattr(mnt_userns, dentry, iattr);
++
++      filemap_invalidate_unlock(mapping);
+-      return simple_setattr(mnt_userns, dentry, iattr);
++      return ret;
+ }
+ static const struct inode_operations secretmem_iops = {
diff --git a/queue-5.18/series b/queue-5.18/series
new file mode 100644 (file)
index 0000000..d8c3c21
--- /dev/null
@@ -0,0 +1,27 @@
+bluetooth-always-set-event-mask-on-suspend.patch
+bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch
+arm-dts-lan966x-fix-sys_clk-frequency.patch
+arm-pxa2xx-fix-gpio-descriptor-tables.patch
+revert-ocfs2-mount-shared-volume-without-ha-stack.patch
+userfaultfd-provide-properly-masked-address-for-huge-pages.patch
+ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch
+fs-sendfile-handles-o_nonblock-of-out_fd.patch
+secretmem-fix-unhandled-fault-in-truncate.patch
+mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch
+mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch
+mm-fix-missing-wake-up-event-for-fsdax-pages.patch
+hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch
+asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch
+s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch
+intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch
+nouveau-svm-fix-to-migrate-all-requested-pages.patch
+drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch
+watch_queue-fix-missing-rcu-annotation.patch
+watch_queue-fix-missing-locking-in-add_watch_to_object.patch
+tcp-fix-data-races-around-sysctl_tcp_dsack.patch
+tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch
+tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch
+tcp-fix-a-data-race-around-sysctl_tcp_frto.patch
+tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch
+tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch
+bridge-do-not-send-empty-ifla_af_spec-attribute.patch
diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch
new file mode 100644 (file)
index 0000000..5f3133c
--- /dev/null
@@ -0,0 +1,31 @@
+From 36eeee75ef0157e42fb6593dcc65daab289b559e Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:14 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_adv_win_scale.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 36eeee75ef0157e42fb6593dcc65daab289b559e upstream.
+
+While reading sysctl_tcp_adv_win_scale, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1437,7 +1437,7 @@ void tcp_select_initial_window(const str
+ static inline int tcp_win_from_space(const struct sock *sk, int space)
+ {
+-      int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
++      int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
+       return tcp_adv_win_scale <= 0 ?
+               (space>>(-tcp_adv_win_scale)) :
diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch
new file mode 100644 (file)
index 0000000..27bcefc
--- /dev/null
@@ -0,0 +1,31 @@
+From 02ca527ac5581cf56749db9fd03d854e842253dd Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:13 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_app_win.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 02ca527ac5581cf56749db9fd03d854e842253dd upstream.
+
+While reading sysctl_tcp_app_win, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock
+  */
+ static void tcp_init_buffer_space(struct sock *sk)
+ {
+-      int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
++      int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
+       struct tcp_sock *tp = tcp_sk(sk);
+       int maxwin;
diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch
new file mode 100644 (file)
index 0000000..df8bb55
--- /dev/null
@@ -0,0 +1,31 @@
+From 706c6202a3589f290e1ef9be0584a8f4a3cc0507 Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:15 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_frto.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 706c6202a3589f290e1ef9be0584a8f4a3cc0507 upstream.
+
+While reading sysctl_tcp_frto, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -2175,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk)
+        * loss recovery is underway except recurring timeout(s) on
+        * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
+        */
+-      tp->frto = net->ipv4.sysctl_tcp_frto &&
++      tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
+                  (new_recovery || icsk->icsk_retransmits) &&
+                  !inet_csk(sk)->icsk_mtup.probe_size;
+ }
diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch
new file mode 100644 (file)
index 0000000..52398e9
--- /dev/null
@@ -0,0 +1,31 @@
+From 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:16 -0700
+Subject: tcp: Fix a data-race around sysctl_tcp_nometrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 upstream.
+
+While reading sysctl_tcp_nometrics_save, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its reader.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_metrics.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk)
+       int m;
+       sk_dst_confirm(sk);
+-      if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
++      if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
+               return;
+       rcu_read_lock();
diff --git a/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch
new file mode 100644 (file)
index 0000000..3aebae6
--- /dev/null
@@ -0,0 +1,40 @@
+From 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:12 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_dsack.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d upstream.
+
+While reading sysctl_tcp_dsack, it can be changed concurrently.
+Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_input.c |    4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4426,7 +4426,7 @@ static void tcp_dsack_set(struct sock *s
+ {
+       struct tcp_sock *tp = tcp_sk(sk);
+-      if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++      if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+               int mib_idx;
+               if (before(seq, tp->rcv_nxt))
+@@ -4473,7 +4473,7 @@ static void tcp_send_dupack(struct sock
+               NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
+               tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
+-              if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
++              if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
+                       u32 end_seq = TCP_SKB_CB(skb)->end_seq;
+                       tcp_rcv_spurious_retrans(sk, skb);
diff --git a/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch
new file mode 100644 (file)
index 0000000..d562d23
--- /dev/null
@@ -0,0 +1,58 @@
+From ab1ba21b523ab496b1a4a8e396333b24b0a18f9a Mon Sep 17 00:00:00 2001
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+Date: Wed, 20 Jul 2022 09:50:17 -0700
+Subject: tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+commit ab1ba21b523ab496b1a4a8e396333b24b0a18f9a upstream.
+
+While reading sysctl_tcp_no_ssthresh_metrics_save, it can be changed
+concurrently.  Thus, we need to add READ_ONCE() to its readers.
+
+Fixes: 65e6d90168f3 ("net-tcp: Disable TCP ssthresh metrics cache by default")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_metrics.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk)
+       if (tcp_in_initial_slowstart(tp)) {
+               /* Slow start still did not finish. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && (tcp_snd_cwnd(tp) >> 1) > val)
+@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk)
+       } else if (!tcp_in_slow_start(tp) &&
+                  icsk->icsk_ca_state == TCP_CA_Open) {
+               /* Cong. avoidance phase, cwnd is reliable. */
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
+                       tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
+                                      max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
+@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk)
+                       tcp_metric_set(tm, TCP_METRIC_CWND,
+                                      (val + tp->snd_ssthresh) >> 1);
+               }
+-              if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
++              if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
+                   !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
+                       val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+                       if (val && tp->snd_ssthresh > val)
+@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk)
+       if (tcp_metric_locked(tm, TCP_METRIC_CWND))
+               tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
+-      val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
++      val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
+             0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
+       if (val) {
+               tp->snd_ssthresh = val;
diff --git a/queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch b/queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch
new file mode 100644 (file)
index 0000000..bd48417
--- /dev/null
@@ -0,0 +1,78 @@
+From d172b1a3bd065dd89234eac547fc62cf80681631 Mon Sep 17 00:00:00 2001
+From: Nadav Amit <namit@vmware.com>
+Date: Mon, 11 Jul 2022 09:59:06 -0700
+Subject: userfaultfd: provide properly masked address for huge-pages
+
+From: Nadav Amit <namit@vmware.com>
+
+commit d172b1a3bd065dd89234eac547fc62cf80681631 upstream.
+
+Commit 824ddc601adc ("userfaultfd: provide unmasked address on
+page-fault") was introduced to fix an old bug, in which the offset in the
+address of a page-fault was masked.  Concerns were raised - although were
+never backed by actual code - that some userspace code might break because
+the bug has been around for quite a while.  To address these concerns a
+new flag was introduced, and only when this flag is set by the user,
+userfaultfd provides the exact address of the page-fault.
+
+The commit however had a bug, and if the flag is unset, the offset was
+always masked based on a base-page granularity.  Yet, for huge-pages, the
+behavior prior to the commit was that the address is masked to the
+huge-page granulrity.
+
+While there are no reports on real breakage, fix this issue.  If the flag
+is unset, use the address with the masking that was done before.
+
+Link: https://lkml.kernel.org/r/20220711165906.2682-1-namit@vmware.com
+Fixes: 824ddc601adc ("userfaultfd: provide unmasked address on page-fault")
+Signed-off-by: Nadav Amit <namit@vmware.com>
+Reported-by: James Houghton <jthoughton@google.com>
+Reviewed-by: Mike Rapoport <rppt@linux.ibm.com>
+Reviewed-by: Peter Xu <peterx@redhat.com>
+Reviewed-by: James Houghton <jthoughton@google.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Andrea Arcangeli <aarcange@redhat.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ fs/userfaultfd.c |   12 +++++++-----
+ 1 file changed, 7 insertions(+), 5 deletions(-)
+
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -191,17 +191,19 @@ static inline void msg_init(struct uffd_
+ }
+ static inline struct uffd_msg userfault_msg(unsigned long address,
++                                          unsigned long real_address,
+                                           unsigned int flags,
+                                           unsigned long reason,
+                                           unsigned int features)
+ {
+       struct uffd_msg msg;
++
+       msg_init(&msg);
+       msg.event = UFFD_EVENT_PAGEFAULT;
+-      if (!(features & UFFD_FEATURE_EXACT_ADDRESS))
+-              address &= PAGE_MASK;
+-      msg.arg.pagefault.address = address;
++      msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ?
++                                  real_address : address;
++
+       /*
+        * These flags indicate why the userfault occurred:
+        * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault.
+@@ -485,8 +487,8 @@ vm_fault_t handle_userfault(struct vm_fa
+       init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
+       uwq.wq.private = current;
+-      uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason,
+-                      ctx->features);
++      uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
++                              reason, ctx->features);
+       uwq.ctx = ctx;
+       uwq.waken = false;
diff --git a/queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch b/queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch
new file mode 100644 (file)
index 0000000..d1042b6
--- /dev/null
@@ -0,0 +1,115 @@
+From e64ab2dbd882933b65cd82ff6235d705ad65dbb6 Mon Sep 17 00:00:00 2001
+From: Linus Torvalds <torvalds@linux-foundation.org>
+Date: Thu, 28 Jul 2022 10:31:12 +0100
+Subject: watch_queue: Fix missing locking in add_watch_to_object()
+
+From: Linus Torvalds <torvalds@linux-foundation.org>
+
+commit e64ab2dbd882933b65cd82ff6235d705ad65dbb6 upstream.
+
+If a watch is being added to a queue, it needs to guard against
+interference from addition of a new watch, manual removal of a watch and
+removal of a watch due to some other queue being destroyed.
+
+KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by
+holding the key->sem writelocked and by holding refs on both the key and
+the queue - but that doesn't prevent interaction from other {key,queue}
+pairs.
+
+While add_watch_to_object() does take the spinlock on the event queue,
+it doesn't take the lock on the source's watch list.  The assumption was
+that the caller would prevent that (say by taking key->sem) - but that
+doesn't prevent interference from the destruction of another queue.
+
+Fix this by locking the watcher list in add_watch_to_object().
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com
+Signed-off-by: David Howells <dhowells@redhat.com>
+cc: keyrings@vger.kernel.org
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |   58 +++++++++++++++++++++++++++++++--------------------
+ 1 file changed, 36 insertions(+), 22 deletions(-)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -454,6 +454,33 @@ void init_watch(struct watch *watch, str
+       rcu_assign_pointer(watch->queue, wqueue);
+ }
++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
++{
++      const struct cred *cred;
++      struct watch *w;
++
++      hlist_for_each_entry(w, &wlist->watchers, list_node) {
++              struct watch_queue *wq = rcu_access_pointer(w->queue);
++              if (wqueue == wq && watch->id == w->id)
++                      return -EBUSY;
++      }
++
++      cred = current_cred();
++      if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
++              atomic_dec(&cred->user->nr_watches);
++              return -EAGAIN;
++      }
++
++      watch->cred = get_cred(cred);
++      rcu_assign_pointer(watch->watch_list, wlist);
++
++      kref_get(&wqueue->usage);
++      kref_get(&watch->usage);
++      hlist_add_head(&watch->queue_node, &wqueue->watches);
++      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
++      return 0;
++}
++
+ /**
+  * add_watch_to_object - Add a watch on an object to a watch list
+  * @watch: The watch to add
+@@ -468,34 +495,21 @@ void init_watch(struct watch *watch, str
+  */
+ int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
+ {
+-      struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
+-      struct watch *w;
++      struct watch_queue *wqueue;
++      int ret = -ENOENT;
+-      hlist_for_each_entry(w, &wlist->watchers, list_node) {
+-              struct watch_queue *wq = rcu_access_pointer(w->queue);
+-              if (wqueue == wq && watch->id == w->id)
+-                      return -EBUSY;
+-      }
+-
+-      watch->cred = get_current_cred();
+-      rcu_assign_pointer(watch->watch_list, wlist);
+-
+-      if (atomic_inc_return(&watch->cred->user->nr_watches) >
+-          task_rlimit(current, RLIMIT_NOFILE)) {
+-              atomic_dec(&watch->cred->user->nr_watches);
+-              put_cred(watch->cred);
+-              return -EAGAIN;
+-      }
++      rcu_read_lock();
++      wqueue = rcu_access_pointer(watch->queue);
+       if (lock_wqueue(wqueue)) {
+-              kref_get(&wqueue->usage);
+-              kref_get(&watch->usage);
+-              hlist_add_head(&watch->queue_node, &wqueue->watches);
++              spin_lock(&wlist->lock);
++              ret = add_one_watch(watch, wlist, wqueue);
++              spin_unlock(&wlist->lock);
+               unlock_wqueue(wqueue);
+       }
+-      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+-      return 0;
++      rcu_read_unlock();
++      return ret;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);
diff --git a/queue-5.18/watch_queue-fix-missing-rcu-annotation.patch b/queue-5.18/watch_queue-fix-missing-rcu-annotation.patch
new file mode 100644 (file)
index 0000000..af06b21
--- /dev/null
@@ -0,0 +1,35 @@
+From e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 Mon Sep 17 00:00:00 2001
+From: David Howells <dhowells@redhat.com>
+Date: Thu, 28 Jul 2022 10:31:06 +0100
+Subject: watch_queue: Fix missing rcu annotation
+
+From: David Howells <dhowells@redhat.com>
+
+commit e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 upstream.
+
+Since __post_watch_notification() walks wlist->watchers with only the
+RCU read lock held, we need to use RCU methods to add to the list (we
+already use RCU methods to remove from the list).
+
+Fix add_watch_to_object() to use hlist_add_head_rcu() instead of
+hlist_add_head() for that list.
+
+Fixes: c73be61cede5 ("pipe: Add general notification queue support")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/watch_queue.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/watch_queue.c
++++ b/kernel/watch_queue.c
+@@ -494,7 +494,7 @@ int add_watch_to_object(struct watch *wa
+               unlock_wqueue(wqueue);
+       }
+-      hlist_add_head(&watch->list_node, &wlist->watchers);
++      hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+       return 0;
+ }
+ EXPORT_SYMBOL(add_watch_to_object);