From b22c9ff66e865e50cad12cfcf81ee8e730d11d94 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Sat, 30 Jul 2022 16:52:41 +0200 Subject: [PATCH] 5.18-stable patches added patches: arm-dts-lan966x-fix-sys_clk-frequency.patch arm-pxa2xx-fix-gpio-descriptor-tables.patch asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch bluetooth-always-set-event-mask-on-suspend.patch bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch bridge-do-not-send-empty-ifla_af_spec-attribute.patch drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch fs-sendfile-handles-o_nonblock-of-out_fd.patch hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch mm-fix-missing-wake-up-event-for-fsdax-pages.patch mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch nouveau-svm-fix-to-migrate-all-requested-pages.patch ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch revert-ocfs2-mount-shared-volume-without-ha-stack.patch s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch secretmem-fix-unhandled-fault-in-truncate.patch tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch tcp-fix-a-data-race-around-sysctl_tcp_frto.patch tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch tcp-fix-data-races-around-sysctl_tcp_dsack.patch tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch userfaultfd-provide-properly-masked-address-for-huge-pages.patch watch_queue-fix-missing-locking-in-add_watch_to_object.patch watch_queue-fix-missing-rcu-annotation.patch --- ...rm-dts-lan966x-fix-sys_clk-frequency.patch | 62 ++++ ...rm-pxa2xx-fix-gpio-descriptor-tables.patch | 142 ++++++++++ ...roken-and-needless-ifdef-conditional.patch | 51 ++++ ...oth-always-set-event-mask-on-suspend.patch | 44 +++ ...-after-free-caused-by-l2cap_chan_put.patch | 264 ++++++++++++++++++ ...ot-send-empty-ifla_af_spec-attribute.patch | 51 ++++ ...ledrm_simple_display_pipe_mode_valid.patch | 55 ++++ ...endfile-handles-o_nonblock-of-out_fd.patch | 119 ++++++++ ...moryleak-in-hugetlb_mcopy_atomic_pte.patch | 36 +++ ...lats-due-to-incorrect-hardirqs-state.patch | 53 ++++ ...issing-wake-up-event-for-fsdax-pages.patch | 118 ++++++++ ...ltiple-threads-mapping-the-same-page.patch | 72 +++++ ...son-entry-in-copy_hugetlb_page_range.patch | 56 ++++ ...m-fix-to-migrate-all-requested-pages.patch | 46 +++ ...s-fix-use-after-free-in-ntfs_ucsncmp.patch | 107 +++++++ ...mount-shared-volume-without-ha-stack.patch | 243 ++++++++++++++++ ...rng-invocations-in-interrupt-context.patch | 125 +++++++++ ...tmem-fix-unhandled-fault-in-truncate.patch | 164 +++++++++++ queue-5.18/series | 27 ++ ...race-around-sysctl_tcp_adv_win_scale.patch | 31 ++ ...-data-race-around-sysctl_tcp_app_win.patch | 31 ++ ...x-a-data-race-around-sysctl_tcp_frto.patch | 31 ++ ...ace-around-sysctl_tcp_nometrics_save.patch | 31 ++ ...x-data-races-around-sysctl_tcp_dsack.patch | 40 +++ ...-sysctl_tcp_no_ssthresh_metrics_save.patch | 58 ++++ ...operly-masked-address-for-huge-pages.patch | 78 ++++++ ...ssing-locking-in-add_watch_to_object.patch | 115 ++++++++ ...tch_queue-fix-missing-rcu-annotation.patch | 35 +++ 28 files changed, 2285 insertions(+) create mode 100644 queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch create mode 100644 queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch create mode 100644 queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch create mode 100644 queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch create mode 100644 queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch create mode 100644 queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch create mode 100644 queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch create mode 100644 queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch create mode 100644 queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch create mode 100644 queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch create mode 100644 queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch create mode 100644 queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch create mode 100644 queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch create mode 100644 queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch create mode 100644 queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch create mode 100644 queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch create mode 100644 queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch create mode 100644 queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch create mode 100644 queue-5.18/series create mode 100644 queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch create mode 100644 queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch create mode 100644 queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch create mode 100644 queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch create mode 100644 queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch create mode 100644 queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch create mode 100644 queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch create mode 100644 queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch create mode 100644 queue-5.18/watch_queue-fix-missing-rcu-annotation.patch diff --git a/queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch b/queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch new file mode 100644 index 00000000000..c5e9ef8688d --- /dev/null +++ b/queue-5.18/arm-dts-lan966x-fix-sys_clk-frequency.patch @@ -0,0 +1,62 @@ +From ef0324b6415db6742bd632dc0dfbb8fbc111473b Mon Sep 17 00:00:00 2001 +From: Michael Walle +Date: Sat, 26 Mar 2022 20:40:28 +0100 +Subject: ARM: dts: lan966x: fix sys_clk frequency + +From: Michael Walle + +commit ef0324b6415db6742bd632dc0dfbb8fbc111473b upstream. + +The sys_clk frequency is 165.625MHz. The register reference of the +Generic Clock controller lists the CPU clock as 600MHz, the DDR clock as +300MHz and the SYS clock as 162.5MHz. This is wrong. It was first +noticed during the fan driver development and it was measured and +verified via the CLK_MON output of the SoC which can be configured to +output sys_clk/64. + +The core PLL settings (which drives the SYS clock) seems to be as +follows: + DIVF = 52 + DIVQ = 3 + DIVR = 1 + +With a refernce clock of 25MHz, this means we have a post divider clock + Fpfd = Fref / (DIVR + 1) = 25MHz / (1 + 1) = 12.5MHz + +The resulting VCO frequency is then + Fvco = Fpfd * (DIVF + 1) * 2 = 12.5MHz * (52 + 1) * 2 = 1325MHz + +And the output frequency is + Fout = Fvco / 2^DIVQ = 1325MHz / 2^3 = 165.625Mhz + +This all adds up to the constrains of the PLL: + 10MHz <= Fpfd <= 200MHz + 20MHz <= Fout <= 1000MHz + 1000MHz <= Fvco <= 2000MHz + +Fixes: 290deaa10c50 ("ARM: dts: add DT for lan966 SoC and 2-port board pcb8291") +Signed-off-by: Michael Walle +Reviewed-by: Kavyasree Kotagiri +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20220326194028.2945985-1-michael@walle.cc +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/boot/dts/lan966x.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm/boot/dts/lan966x.dtsi b/arch/arm/boot/dts/lan966x.dtsi +index 3cb02fffe716..38e90a31d2dd 100644 +--- a/arch/arm/boot/dts/lan966x.dtsi ++++ b/arch/arm/boot/dts/lan966x.dtsi +@@ -38,7 +38,7 @@ clocks { + sys_clk: sys_clk { + compatible = "fixed-clock"; + #clock-cells = <0>; +- clock-frequency = <162500000>; ++ clock-frequency = <165625000>; + }; + + cpu_clk: cpu_clk { +-- +2.37.1 + diff --git a/queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch b/queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch new file mode 100644 index 00000000000..a6167837c8d --- /dev/null +++ b/queue-5.18/arm-pxa2xx-fix-gpio-descriptor-tables.patch @@ -0,0 +1,142 @@ +From c5cdb9286913aa5a5ebb81bcca0c17df3b0e2c79 Mon Sep 17 00:00:00 2001 +From: Linus Walleij +Date: Fri, 22 Jul 2022 13:46:11 +0200 +Subject: ARM: pxa2xx: Fix GPIO descriptor tables + +From: Linus Walleij + +commit c5cdb9286913aa5a5ebb81bcca0c17df3b0e2c79 upstream. + +Laurence reports: + +"Kernel >5.18 on Zaurus has a bug where the power management code can't +talk to devices, emitting the following errors: + +sharpsl-pm sharpsl-pm: Error: AC check failed: voltage -22. +sharpsl-pm sharpsl-pm: Charging Error! +sharpsl-pm sharpsl-pm: Warning: Cannot read main battery! + +Looking at the recent changes, I found that commit 31455bbda208 ("spi: +pxa2xx_spi: Convert to use GPIO descriptors") replaced the deprecated +SPI chip select platform device code with a gpiod lookup table. However, +this didn't seem to work until I changed the `dev_id` member from the +device name to the bus id. I'm not entirely sure why this is necessary, +but I suspect it is related to the fact that in sysfs SPI devices are +attached under /sys/devices/.../dev_name/spi_master/spiB/spiB.C, rather +than directly to the device." + +After reviewing the change I conclude that the same fix is needed +for all affected boards. + +Fixes: 31455bbda208 ("spi: pxa2xx_spi: Convert to use GPIO descriptors") +Reported-by: Laurence de Bruxelles +Signed-off-by: Linus Walleij +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20220722114611.1517414-1-linus.walleij@linaro.org' +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + arch/arm/mach-pxa/corgi.c | 2 +- + arch/arm/mach-pxa/hx4700.c | 2 +- + arch/arm/mach-pxa/icontrol.c | 4 ++-- + arch/arm/mach-pxa/littleton.c | 2 +- + arch/arm/mach-pxa/magician.c | 2 +- + arch/arm/mach-pxa/spitz.c | 2 +- + arch/arm/mach-pxa/z2.c | 4 ++-- + 7 files changed, 9 insertions(+), 9 deletions(-) + +--- a/arch/arm/mach-pxa/corgi.c ++++ b/arch/arm/mach-pxa/corgi.c +@@ -531,7 +531,7 @@ static struct pxa2xx_spi_controller corg + }; + + static struct gpiod_lookup_table corgi_spi_gpio_table = { +- .dev_id = "pxa2xx-spi.1", ++ .dev_id = "spi1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), +--- a/arch/arm/mach-pxa/hx4700.c ++++ b/arch/arm/mach-pxa/hx4700.c +@@ -635,7 +635,7 @@ static struct pxa2xx_spi_controller pxa_ + }; + + static struct gpiod_lookup_table pxa_ssp2_gpio_table = { +- .dev_id = "pxa2xx-spi.2", ++ .dev_id = "spi2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, +--- a/arch/arm/mach-pxa/icontrol.c ++++ b/arch/arm/mach-pxa/icontrol.c +@@ -140,7 +140,7 @@ struct platform_device pxa_spi_ssp4 = { + }; + + static struct gpiod_lookup_table pxa_ssp3_gpio_table = { +- .dev_id = "pxa2xx-spi.3", ++ .dev_id = "spi3", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW), +@@ -149,7 +149,7 @@ static struct gpiod_lookup_table pxa_ssp + }; + + static struct gpiod_lookup_table pxa_ssp4_gpio_table = { +- .dev_id = "pxa2xx-spi.4", ++ .dev_id = "spi4", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW), +--- a/arch/arm/mach-pxa/littleton.c ++++ b/arch/arm/mach-pxa/littleton.c +@@ -208,7 +208,7 @@ static struct spi_board_info littleton_s + }; + + static struct gpiod_lookup_table littleton_spi_gpio_table = { +- .dev_id = "pxa2xx-spi.2", ++ .dev_id = "spi2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, +--- a/arch/arm/mach-pxa/magician.c ++++ b/arch/arm/mach-pxa/magician.c +@@ -946,7 +946,7 @@ static struct pxa2xx_spi_controller magi + }; + + static struct gpiod_lookup_table magician_spi_gpio_table = { +- .dev_id = "pxa2xx-spi.2", ++ .dev_id = "spi2", + .table = { + /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ + GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), +--- a/arch/arm/mach-pxa/spitz.c ++++ b/arch/arm/mach-pxa/spitz.c +@@ -578,7 +578,7 @@ static struct pxa2xx_spi_controller spit + }; + + static struct gpiod_lookup_table spitz_spi_gpio_table = { +- .dev_id = "pxa2xx-spi.2", ++ .dev_id = "spi2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), +--- a/arch/arm/mach-pxa/z2.c ++++ b/arch/arm/mach-pxa/z2.c +@@ -623,7 +623,7 @@ static struct pxa2xx_spi_controller pxa_ + }; + + static struct gpiod_lookup_table pxa_ssp1_gpio_table = { +- .dev_id = "pxa2xx-spi.1", ++ .dev_id = "spi1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, +@@ -631,7 +631,7 @@ static struct gpiod_lookup_table pxa_ssp + }; + + static struct gpiod_lookup_table pxa_ssp2_gpio_table = { +- .dev_id = "pxa2xx-spi.2", ++ .dev_id = "spi2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, diff --git a/queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch b/queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch new file mode 100644 index 00000000000..da5989b5279 --- /dev/null +++ b/queue-5.18/asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch @@ -0,0 +1,51 @@ +From e2a619ca0b38f2114347b7078b8a67d72d457a3d Mon Sep 17 00:00:00 2001 +From: Lukas Bulwahn +Date: Fri, 22 Jul 2022 13:07:11 +0200 +Subject: asm-generic: remove a broken and needless ifdef conditional + +From: Lukas Bulwahn + +commit e2a619ca0b38f2114347b7078b8a67d72d457a3d upstream. + +Commit 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") +introduces the config symbol GENERIC_LIB_DEVMEM_IS_ALLOWED, but then +falsely refers to CONFIG_GENERIC_DEVMEM_IS_ALLOWED (note the missing LIB +in the reference) in ./include/asm-generic/io.h. + +Luckily, ./scripts/checkkconfigsymbols.py warns on non-existing configs: + +GENERIC_DEVMEM_IS_ALLOWED +Referencing files: include/asm-generic/io.h + +The actual fix, though, is simply to not to make this function declaration +dependent on any kernel config. For architectures that intend to use +the generic version, the arch's 'select GENERIC_LIB_DEVMEM_IS_ALLOWED' will +lead to picking the function definition, and for other architectures, this +function is simply defined elsewhere. + +The wrong '#ifndef' on a non-existing config symbol also always had the +same effect (although more by mistake than by intent). So, there is no +functional change. + +Remove this broken and needless ifdef conditional. + +Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") +Signed-off-by: Lukas Bulwahn +Signed-off-by: Arnd Bergmann +Signed-off-by: Greg Kroah-Hartman +--- + include/asm-generic/io.h | 2 -- + 1 file changed, 2 deletions(-) + +--- a/include/asm-generic/io.h ++++ b/include/asm-generic/io.h +@@ -1125,9 +1125,7 @@ static inline void memcpy_toio(volatile + } + #endif + +-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED + extern int devmem_is_allowed(unsigned long pfn); +-#endif + + #endif /* __KERNEL__ */ + diff --git a/queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch b/queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch new file mode 100644 index 00000000000..e59e49b4c93 --- /dev/null +++ b/queue-5.18/bluetooth-always-set-event-mask-on-suspend.patch @@ -0,0 +1,44 @@ +From ef61b6ea154464fefd8a6712d7a3b43b445c3d4a Mon Sep 17 00:00:00 2001 +From: Abhishek Pandit-Subedi +Date: Mon, 25 Jul 2022 15:34:21 -0700 +Subject: Bluetooth: Always set event mask on suspend + +From: Abhishek Pandit-Subedi + +commit ef61b6ea154464fefd8a6712d7a3b43b445c3d4a upstream. + +When suspending, always set the event mask once disconnects are +successful. Otherwise, if wakeup is disallowed, the event mask is not +set before suspend continues and can result in an early wakeup. + +Fixes: 182ee45da083 ("Bluetooth: hci_sync: Rework hci_suspend_notifier") +Cc: stable@vger.kernel.org +Signed-off-by: Abhishek Pandit-Subedi +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + net/bluetooth/hci_sync.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +--- a/net/bluetooth/hci_sync.c ++++ b/net/bluetooth/hci_sync.c +@@ -4942,6 +4942,9 @@ int hci_suspend_sync(struct hci_dev *hde + return err; + } + ++ /* Update event mask so only the allowed event can wakeup the host */ ++ hci_set_event_mask_sync(hdev); ++ + /* Only configure accept list if disconnect succeeded and wake + * isn't being prevented. + */ +@@ -4953,9 +4956,6 @@ int hci_suspend_sync(struct hci_dev *hde + /* Unpause to take care of updating scanning params */ + hdev->scanning_paused = false; + +- /* Update event mask so only the allowed event can wakeup the host */ +- hci_set_event_mask_sync(hdev); +- + /* Enable event filter for paired devices */ + hci_update_event_filter_sync(hdev); + diff --git a/queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch b/queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch new file mode 100644 index 00000000000..71ad8f97c36 --- /dev/null +++ b/queue-5.18/bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch @@ -0,0 +1,264 @@ +From d0be8347c623e0ac4202a1d4e0373882821f56b0 Mon Sep 17 00:00:00 2001 +From: Luiz Augusto von Dentz +Date: Thu, 21 Jul 2022 09:10:50 -0700 +Subject: Bluetooth: L2CAP: Fix use-after-free caused by l2cap_chan_put + +From: Luiz Augusto von Dentz + +commit d0be8347c623e0ac4202a1d4e0373882821f56b0 upstream. + +This fixes the following trace which is caused by hci_rx_work starting up +*after* the final channel reference has been put() during sock_close() but +*before* the references to the channel have been destroyed, so instead +the code now rely on kref_get_unless_zero/l2cap_chan_hold_unless_zero to +prevent referencing a channel that is about to be destroyed. + + refcount_t: increment on 0; use-after-free. + BUG: KASAN: use-after-free in refcount_dec_and_test+0x20/0xd0 + Read of size 4 at addr ffffffc114f5bf18 by task kworker/u17:14/705 + + CPU: 4 PID: 705 Comm: kworker/u17:14 Tainted: G S W + 4.14.234-00003-g1fb6d0bd49a4-dirty #28 + Hardware name: Qualcomm Technologies, Inc. SM8150 V2 PM8150 + Google Inc. MSM sm8150 Flame DVT (DT) + Workqueue: hci0 hci_rx_work + Call trace: + dump_backtrace+0x0/0x378 + show_stack+0x20/0x2c + dump_stack+0x124/0x148 + print_address_description+0x80/0x2e8 + __kasan_report+0x168/0x188 + kasan_report+0x10/0x18 + __asan_load4+0x84/0x8c + refcount_dec_and_test+0x20/0xd0 + l2cap_chan_put+0x48/0x12c + l2cap_recv_frame+0x4770/0x6550 + l2cap_recv_acldata+0x44c/0x7a4 + hci_acldata_packet+0x100/0x188 + hci_rx_work+0x178/0x23c + process_one_work+0x35c/0x95c + worker_thread+0x4cc/0x960 + kthread+0x1a8/0x1c4 + ret_from_fork+0x10/0x18 + +Cc: stable@kernel.org +Reported-by: Lee Jones +Signed-off-by: Luiz Augusto von Dentz +Tested-by: Lee Jones +Signed-off-by: Luiz Augusto von Dentz +Signed-off-by: Greg Kroah-Hartman +--- + include/net/bluetooth/l2cap.h | 1 + net/bluetooth/l2cap_core.c | 61 +++++++++++++++++++++++++++++++++--------- + 2 files changed, 49 insertions(+), 13 deletions(-) + +--- a/include/net/bluetooth/l2cap.h ++++ b/include/net/bluetooth/l2cap.h +@@ -847,6 +847,7 @@ enum { + }; + + void l2cap_chan_hold(struct l2cap_chan *c); ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c); + void l2cap_chan_put(struct l2cap_chan *c); + + static inline void l2cap_chan_lock(struct l2cap_chan *chan) +--- a/net/bluetooth/l2cap_core.c ++++ b/net/bluetooth/l2cap_core.c +@@ -111,7 +111,8 @@ static struct l2cap_chan *__l2cap_get_ch + } + + /* Find channel with given SCID. +- * Returns locked channel. */ ++ * Returns a reference locked channel. ++ */ + static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn, + u16 cid) + { +@@ -119,15 +120,19 @@ static struct l2cap_chan *l2cap_get_chan + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_scid(conn, cid); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; + } + + /* Find channel with given DCID. +- * Returns locked channel. ++ * Returns a reference locked channel. + */ + static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn, + u16 cid) +@@ -136,8 +141,12 @@ static struct l2cap_chan *l2cap_get_chan + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_dcid(conn, cid); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; +@@ -162,8 +171,12 @@ static struct l2cap_chan *l2cap_get_chan + + mutex_lock(&conn->chan_lock); + c = __l2cap_get_chan_by_ident(conn, ident); +- if (c) +- l2cap_chan_lock(c); ++ if (c) { ++ /* Only lock if chan reference is not 0 */ ++ c = l2cap_chan_hold_unless_zero(c); ++ if (c) ++ l2cap_chan_lock(c); ++ } + mutex_unlock(&conn->chan_lock); + + return c; +@@ -497,6 +510,16 @@ void l2cap_chan_hold(struct l2cap_chan * + kref_get(&c->kref); + } + ++struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c) ++{ ++ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); ++ ++ if (!kref_get_unless_zero(&c->kref)) ++ return NULL; ++ ++ return c; ++} ++ + void l2cap_chan_put(struct l2cap_chan *c) + { + BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref)); +@@ -1968,7 +1991,10 @@ static struct l2cap_chan *l2cap_global_c + src_match = !bacmp(&c->src, src); + dst_match = !bacmp(&c->dst, dst); + if (src_match && dst_match) { +- l2cap_chan_hold(c); ++ c = l2cap_chan_hold_unless_zero(c); ++ if (!c) ++ continue; ++ + read_unlock(&chan_list_lock); + return c; + } +@@ -1983,7 +2009,7 @@ static struct l2cap_chan *l2cap_global_c + } + + if (c1) +- l2cap_chan_hold(c1); ++ c1 = l2cap_chan_hold_unless_zero(c1); + + read_unlock(&chan_list_lock); + +@@ -4463,6 +4489,7 @@ static inline int l2cap_config_req(struc + + unlock: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + return err; + } + +@@ -4577,6 +4604,7 @@ static inline int l2cap_config_rsp(struc + + done: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + return err; + } + +@@ -5304,6 +5332,7 @@ send_move_response: + l2cap_send_move_chan_rsp(chan, result); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5396,6 +5425,7 @@ static void l2cap_move_continue(struct l + } + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid, +@@ -5425,6 +5455,7 @@ static void l2cap_move_fail(struct l2cap + l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static int l2cap_move_channel_rsp(struct l2cap_conn *conn, +@@ -5488,6 +5519,7 @@ static int l2cap_move_channel_confirm(st + l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid); + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5523,6 +5555,7 @@ static inline int l2cap_move_channel_con + } + + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -5895,12 +5928,11 @@ static inline int l2cap_le_credits(struc + if (credits > max_credits) { + BT_ERR("LE credits overflow"); + l2cap_send_disconn_req(chan, ECONNRESET); +- l2cap_chan_unlock(chan); + + /* Return 0 so that we don't trigger an unnecessary + * command reject packet. + */ +- return 0; ++ goto unlock; + } + + chan->tx_credits += credits; +@@ -5911,7 +5943,9 @@ static inline int l2cap_le_credits(struc + if (chan->tx_credits) + chan->ops->resume(chan); + ++unlock: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + + return 0; + } +@@ -7597,6 +7631,7 @@ drop: + + done: + l2cap_chan_unlock(chan); ++ l2cap_chan_put(chan); + } + + static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, +@@ -8085,7 +8120,7 @@ static struct l2cap_chan *l2cap_global_f + if (src_type != c->src_type) + continue; + +- l2cap_chan_hold(c); ++ c = l2cap_chan_hold_unless_zero(c); + read_unlock(&chan_list_lock); + return c; + } diff --git a/queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch b/queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch new file mode 100644 index 00000000000..2937a2cf2fd --- /dev/null +++ b/queue-5.18/bridge-do-not-send-empty-ifla_af_spec-attribute.patch @@ -0,0 +1,51 @@ +From 9b134b1694ec8926926ba6b7b80884ea829245a0 Mon Sep 17 00:00:00 2001 +From: Benjamin Poirier +Date: Mon, 25 Jul 2022 09:12:36 +0900 +Subject: bridge: Do not send empty IFLA_AF_SPEC attribute + +From: Benjamin Poirier + +commit 9b134b1694ec8926926ba6b7b80884ea829245a0 upstream. + +After commit b6c02ef54913 ("bridge: Netlink interface fix."), +br_fill_ifinfo() started to send an empty IFLA_AF_SPEC attribute when a +bridge vlan dump is requested but an interface does not have any vlans +configured. + +iproute2 ignores such an empty attribute since commit b262a9becbcb +("bridge: Fix output with empty vlan lists") but older iproute2 versions as +well as other utilities have their output changed by the cited kernel +commit, resulting in failed test cases. Regardless, emitting an empty +attribute is pointless and inefficient. + +Avoid this change by canceling the attribute if no AF_SPEC data was added. + +Fixes: b6c02ef54913 ("bridge: Netlink interface fix.") +Reviewed-by: Ido Schimmel +Signed-off-by: Benjamin Poirier +Acked-by: Nikolay Aleksandrov +Link: https://lore.kernel.org/r/20220725001236.95062-1-bpoirier@nvidia.com +Signed-off-by: Paolo Abeni +Signed-off-by: Greg Kroah-Hartman +--- + net/bridge/br_netlink.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/net/bridge/br_netlink.c ++++ b/net/bridge/br_netlink.c +@@ -589,9 +589,13 @@ static int br_fill_ifinfo(struct sk_buff + } + + done: ++ if (af) { ++ if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0)) ++ nla_nest_end(skb, af); ++ else ++ nla_nest_cancel(skb, af); ++ } + +- if (af) +- nla_nest_end(skb, af); + nlmsg_end(skb, nlh); + return 0; + diff --git a/queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch b/queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch new file mode 100644 index 00000000000..96244b438fc --- /dev/null +++ b/queue-5.18/drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch @@ -0,0 +1,55 @@ +From 0c09bc33aa8e9dc867300acaadc318c2f0d85a1e Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor +Date: Mon, 25 Jul 2022 16:36:29 -0700 +Subject: drm/simpledrm: Fix return type of simpledrm_simple_display_pipe_mode_valid() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Nathan Chancellor + +commit 0c09bc33aa8e9dc867300acaadc318c2f0d85a1e upstream. + +When booting a kernel compiled with clang's CFI protection +(CONFIG_CFI_CLANG), there is a CFI failure in +drm_simple_kms_crtc_mode_valid() when trying to call +simpledrm_simple_display_pipe_mode_valid() through ->mode_valid(): + +[ 0.322802] CFI failure (target: simpledrm_simple_display_pipe_mode_valid+0x0/0x8): +... +[ 0.324928] Call trace: +[ 0.324969] __ubsan_handle_cfi_check_fail+0x58/0x60 +[ 0.325053] __cfi_check_fail+0x3c/0x44 +[ 0.325120] __cfi_slowpath_diag+0x178/0x200 +[ 0.325192] drm_simple_kms_crtc_mode_valid+0x58/0x80 +[ 0.325279] __drm_helper_update_and_validate+0x31c/0x464 +... + +The ->mode_valid() member in 'struct drm_simple_display_pipe_funcs' +expects a return type of 'enum drm_mode_status', not 'int'. Correct it +to fix the CFI failure. + +Cc: stable@vger.kernel.org +Fixes: 11e8f5fd223b ("drm: Add simpledrm driver") +Link: https://github.com/ClangBuiltLinux/linux/issues/1647 +Reported-by: Tomasz Paweł Gajc +Signed-off-by: Nathan Chancellor +Signed-off-by: Thomas Zimmermann +Reviewed-by: Sami Tolvanen +Link: https://patchwork.freedesktop.org/patch/msgid/20220725233629.223223-1-nathan@kernel.org +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/tiny/simpledrm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/tiny/simpledrm.c ++++ b/drivers/gpu/drm/tiny/simpledrm.c +@@ -627,7 +627,7 @@ static const struct drm_connector_funcs + .atomic_destroy_state = drm_atomic_helper_connector_destroy_state, + }; + +-static int ++static enum drm_mode_status + simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe, + const struct drm_display_mode *mode) + { diff --git a/queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch b/queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch new file mode 100644 index 00000000000..b34b9cea333 --- /dev/null +++ b/queue-5.18/fs-sendfile-handles-o_nonblock-of-out_fd.patch @@ -0,0 +1,119 @@ +From bdeb77bc2c405fa9f954c20269db175a0bd2793f Mon Sep 17 00:00:00 2001 +From: Andrei Vagin +Date: Sat, 16 Jul 2022 21:37:10 -0700 +Subject: fs: sendfile handles O_NONBLOCK of out_fd + +From: Andrei Vagin + +commit bdeb77bc2c405fa9f954c20269db175a0bd2793f upstream. + +sendfile has to return EAGAIN if out_fd is nonblocking and the write into +it would block. + +Here is a small reproducer for the problem: + +#define _GNU_SOURCE /* See feature_test_macros(7) */ +#include +#include +#include +#include +#include +#include +#include + + +#define FILE_SIZE (1UL << 30) +int main(int argc, char **argv) { + int p[2], fd; + + if (pipe2(p, O_NONBLOCK)) + return 1; + + fd = open(argv[1], O_RDWR | O_TMPFILE, 0666); + if (fd < 0) + return 1; + ftruncate(fd, FILE_SIZE); + + if (sendfile(p[1], fd, 0, FILE_SIZE) == -1) { + fprintf(stderr, "FAIL\n"); + } + if (sendfile(p[1], fd, 0, FILE_SIZE) != -1 || errno != EAGAIN) { + fprintf(stderr, "FAIL\n"); + } + return 0; +} + +It worked before b964bf53e540, it is stuck after b964bf53e540, and it +works again with this fix. + +This regression occurred because do_splice_direct() calls pipe_write +that handles O_NONBLOCK. Here is a trace log from the reproducer: + + 1) | __x64_sys_sendfile64() { + 1) | do_sendfile() { + 1) | __fdget() + 1) | rw_verify_area() + 1) | __fdget() + 1) | rw_verify_area() + 1) | do_splice_direct() { + 1) | rw_verify_area() + 1) | splice_direct_to_actor() { + 1) | do_splice_to() { + 1) | rw_verify_area() + 1) | generic_file_splice_read() + 1) + 74.153 us | } + 1) | direct_splice_actor() { + 1) | iter_file_splice_write() { + 1) | __kmalloc() + 1) 0.148 us | pipe_lock(); + 1) 0.153 us | splice_from_pipe_next.part.0(); + 1) 0.162 us | page_cache_pipe_buf_confirm(); +... 16 times + 1) 0.159 us | page_cache_pipe_buf_confirm(); + 1) | vfs_iter_write() { + 1) | do_iter_write() { + 1) | rw_verify_area() + 1) | do_iter_readv_writev() { + 1) | pipe_write() { + 1) | mutex_lock() + 1) 0.153 us | mutex_unlock(); + 1) 1.368 us | } + 1) 1.686 us | } + 1) 5.798 us | } + 1) 6.084 us | } + 1) 0.174 us | kfree(); + 1) 0.152 us | pipe_unlock(); + 1) + 14.461 us | } + 1) + 14.783 us | } + 1) 0.164 us | page_cache_pipe_buf_release(); +... 16 times + 1) 0.161 us | page_cache_pipe_buf_release(); + 1) | touch_atime() + 1) + 95.854 us | } + 1) + 99.784 us | } + 1) ! 107.393 us | } + 1) ! 107.699 us | } + +Link: https://lkml.kernel.org/r/20220415005015.525191-1-avagin@gmail.com +Fixes: b964bf53e540 ("teach sendfile(2) to handle send-to-pipe directly") +Signed-off-by: Andrei Vagin +Cc: Al Viro +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/read_write.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/fs/read_write.c ++++ b/fs/read_write.c +@@ -1247,6 +1247,9 @@ static ssize_t do_sendfile(int out_fd, i + count, fl); + file_end_write(out.file); + } else { ++ if (out.file->f_flags & O_NONBLOCK) ++ fl |= SPLICE_F_NONBLOCK; ++ + retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); + } + diff --git a/queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch b/queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch new file mode 100644 index 00000000000..1b28d079690 --- /dev/null +++ b/queue-5.18/hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch @@ -0,0 +1,36 @@ +From da9a298f5fad0dc615079a340da42928bc5b138e Mon Sep 17 00:00:00 2001 +From: Miaohe Lin +Date: Sat, 9 Jul 2022 17:26:29 +0800 +Subject: hugetlb: fix memoryleak in hugetlb_mcopy_atomic_pte + +From: Miaohe Lin + +commit da9a298f5fad0dc615079a340da42928bc5b138e upstream. + +When alloc_huge_page fails, *pagep is set to NULL without put_page first. +So the hugepage indicated by *pagep is leaked. + +Link: https://lkml.kernel.org/r/20220709092629.54291-1-linmiaohe@huawei.com +Fixes: 8cc5fcbb5be8 ("mm, hugetlb: fix racy resv_huge_pages underflow on UFFDIO_COPY") +Signed-off-by: Miaohe Lin +Acked-by: Muchun Song +Reviewed-by: Anshuman Khandual +Reviewed-by: Baolin Wang +Reviewed-by: Mike Kravetz +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 1 + + 1 file changed, 1 insertion(+) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -5827,6 +5827,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_s + + page = alloc_huge_page(dst_vma, dst_addr, 0); + if (IS_ERR(page)) { ++ put_page(*pagep); + ret = -ENOMEM; + *pagep = NULL; + goto out; diff --git a/queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch b/queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch new file mode 100644 index 00000000000..e048ad19443 --- /dev/null +++ b/queue-5.18/intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch @@ -0,0 +1,53 @@ +From d295ad34f236c3518634fb6403d4c0160456e470 Mon Sep 17 00:00:00 2001 +From: Waiman Long +Date: Sat, 23 Jul 2022 15:59:32 -0400 +Subject: intel_idle: Fix false positive RCU splats due to incorrect hardirqs state + +From: Waiman Long + +commit d295ad34f236c3518634fb6403d4c0160456e470 upstream. + +Commit 32d4fd5751ea ("cpuidle,intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE") +uses raw_local_irq_enable/local_irq_disable() around call to +__intel_idle() in intel_idle_irq(). + +With interrupt enabled, timer tick interrupt can happen and a +subsequently call to __do_softirq() may change the lockdep hardirqs state +of a debug kernel back to 'on'. This will result in a mismatch between +the cpu hardirqs state (off) and the lockdep hardirqs state (on) causing +a number of false positive "WARNING: suspicious RCU usage" splats. + +Fix that by using local_irq_disable() to disable interrupt in +intel_idle_irq(). + +Fixes: 32d4fd5751ea ("cpuidle,intel_idle: Fix CPUIDLE_FLAG_IRQ_ENABLE") +Signed-off-by: Waiman Long +Cc: 5.16+ # 5.16+ +Signed-off-by: Rafael J. Wysocki +Signed-off-by: Greg Kroah-Hartman +--- + drivers/idle/intel_idle.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c +index f5c6802aa6c3..907700d1e78e 100644 +--- a/drivers/idle/intel_idle.c ++++ b/drivers/idle/intel_idle.c +@@ -162,7 +162,13 @@ static __cpuidle int intel_idle_irq(struct cpuidle_device *dev, + + raw_local_irq_enable(); + ret = __intel_idle(dev, drv, index); +- raw_local_irq_disable(); ++ ++ /* ++ * The lockdep hardirqs state may be changed to 'on' with timer ++ * tick interrupt followed by __do_softirq(). Use local_irq_disable() ++ * to keep the hardirqs state correct. ++ */ ++ local_irq_disable(); + + return ret; + } +-- +2.37.1 + diff --git a/queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch b/queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch new file mode 100644 index 00000000000..e291fa413af --- /dev/null +++ b/queue-5.18/mm-fix-missing-wake-up-event-for-fsdax-pages.patch @@ -0,0 +1,118 @@ +From f4f451a16dd1f478fdb966bcbb612c1e4ce6b962 Mon Sep 17 00:00:00 2001 +From: Muchun Song +Date: Tue, 5 Jul 2022 20:35:32 +0800 +Subject: mm: fix missing wake-up event for FSDAX pages + +From: Muchun Song + +commit f4f451a16dd1f478fdb966bcbb612c1e4ce6b962 upstream. + +FSDAX page refcounts are 1-based, rather than 0-based: if refcount is +1, then the page is freed. The FSDAX pages can be pinned through GUP, +then they will be unpinned via unpin_user_page() using a folio variant +to put the page, however, folio variants did not consider this special +case, the result will be to miss a wakeup event (like the user of +__fuse_dax_break_layouts()). This results in a task being permanently +stuck in TASK_INTERRUPTIBLE state. + +Since FSDAX pages are only possibly obtained by GUP users, so fix GUP +instead of folio_put() to lower overhead. + +Link: https://lkml.kernel.org/r/20220705123532.283-1-songmuchun@bytedance.com +Fixes: d8ddc099c6b3 ("mm/gup: Add gup_put_folio()") +Signed-off-by: Muchun Song +Suggested-by: Matthew Wilcox +Cc: Jason Gunthorpe +Cc: John Hubbard +Cc: William Kucharski +Cc: Dan Williams +Cc: Jan Kara +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + include/linux/mm.h | 14 +++++++++----- + mm/gup.c | 6 ++++-- + mm/memremap.c | 6 +++--- + 3 files changed, 16 insertions(+), 10 deletions(-) + +--- a/include/linux/mm.h ++++ b/include/linux/mm.h +@@ -1130,23 +1130,27 @@ static inline bool is_zone_movable_page( + #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) + DECLARE_STATIC_KEY_FALSE(devmap_managed_key); + +-bool __put_devmap_managed_page(struct page *page); +-static inline bool put_devmap_managed_page(struct page *page) ++bool __put_devmap_managed_page_refs(struct page *page, int refs); ++static inline bool put_devmap_managed_page_refs(struct page *page, int refs) + { + if (!static_branch_unlikely(&devmap_managed_key)) + return false; + if (!is_zone_device_page(page)) + return false; +- return __put_devmap_managed_page(page); ++ return __put_devmap_managed_page_refs(page, refs); + } +- + #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ +-static inline bool put_devmap_managed_page(struct page *page) ++static inline bool put_devmap_managed_page_refs(struct page *page, int refs) + { + return false; + } + #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ + ++static inline bool put_devmap_managed_page(struct page *page) ++{ ++ return put_devmap_managed_page_refs(page, 1); ++} ++ + /* 127: arbitrary random number, small enough to assemble well */ + #define folio_ref_zero_or_close_to_overflow(folio) \ + ((unsigned int) folio_ref_count(folio) + 127u <= 127u) +--- a/mm/gup.c ++++ b/mm/gup.c +@@ -54,7 +54,8 @@ retry: + * belongs to this folio. + */ + if (unlikely(page_folio(page) != folio)) { +- folio_put_refs(folio, refs); ++ if (!put_devmap_managed_page_refs(&folio->page, refs)) ++ folio_put_refs(folio, refs); + goto retry; + } + +@@ -143,7 +144,8 @@ static void gup_put_folio(struct folio * + refs *= GUP_PIN_COUNTING_BIAS; + } + +- folio_put_refs(folio, refs); ++ if (!put_devmap_managed_page_refs(&folio->page, refs)) ++ folio_put_refs(folio, refs); + } + + /** +--- a/mm/memremap.c ++++ b/mm/memremap.c +@@ -489,7 +489,7 @@ void free_zone_device_page(struct page * + } + + #ifdef CONFIG_FS_DAX +-bool __put_devmap_managed_page(struct page *page) ++bool __put_devmap_managed_page_refs(struct page *page, int refs) + { + if (page->pgmap->type != MEMORY_DEVICE_FS_DAX) + return false; +@@ -499,9 +499,9 @@ bool __put_devmap_managed_page(struct pa + * refcount is 1, then the page is free and the refcount is + * stable because nobody holds a reference on the page. + */ +- if (page_ref_dec_return(page) == 1) ++ if (page_ref_sub_return(page, refs) == 1) + wake_up_var(&page->_refcount); + return true; + } +-EXPORT_SYMBOL(__put_devmap_managed_page); ++EXPORT_SYMBOL(__put_devmap_managed_page_refs); + #endif /* CONFIG_FS_DAX */ diff --git a/queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch b/queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch new file mode 100644 index 00000000000..9ed53bcb1d6 --- /dev/null +++ b/queue-5.18/mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch @@ -0,0 +1,72 @@ +From 3fe2895cfecd03ac74977f32102b966b6589f481 Mon Sep 17 00:00:00 2001 +From: Josef Bacik +Date: Tue, 5 Jul 2022 16:00:36 -0400 +Subject: mm: fix page leak with multiple threads mapping the same page + +From: Josef Bacik + +commit 3fe2895cfecd03ac74977f32102b966b6589f481 upstream. + +We have an application with a lot of threads that use a shared mmap backed +by tmpfs mounted with -o huge=within_size. This application started +leaking loads of huge pages when we upgraded to a recent kernel. + +Using the page ref tracepoints and a BPF program written by Tejun Heo we +were able to determine that these pages would have multiple refcounts from +the page fault path, but when it came to unmap time we wouldn't drop the +number of refs we had added from the faults. + +I wrote a reproducer that mmap'ed a file backed by tmpfs with -o +huge=always, and then spawned 20 threads all looping faulting random +offsets in this map, while using madvise(MADV_DONTNEED) randomly for huge +page aligned ranges. This very quickly reproduced the problem. + +The problem here is that we check for the case that we have multiple +threads faulting in a range that was previously unmapped. One thread maps +the PMD, the other thread loses the race and then returns 0. However at +this point we already have the page, and we are no longer putting this +page into the processes address space, and so we leak the page. We +actually did the correct thing prior to f9ce0be71d1f, however it looks +like Kirill copied what we do in the anonymous page case. In the +anonymous page case we don't yet have a page, so we don't have to drop a +reference on anything. Previously we did the correct thing for file based +faults by returning VM_FAULT_NOPAGE so we correctly drop the reference on +the page we faulted in. + +Fix this by returning VM_FAULT_NOPAGE in the pmd_devmap_trans_unstable() +case, this makes us drop the ref on the page properly, and now my +reproducer no longer leaks the huge pages. + +[josef@toxicpanda.com: v2] + Link: https://lkml.kernel.org/r/e90c8f0dbae836632b669c2afc434006a00d4a67.1657721478.git.josef@toxicpanda.com +Link: https://lkml.kernel.org/r/2b798acfd95c9ab9395fe85e8d5a835e2e10a920.1657051137.git.josef@toxicpanda.com +Fixes: f9ce0be71d1f ("mm: Cleanup faultaround and finish_fault() codepaths") +Signed-off-by: Josef Bacik +Signed-off-by: Rik van Riel +Signed-off-by: Chris Mason +Acked-by: Kirill A. Shutemov +Cc: Matthew Wilcox (Oracle) +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/memory.c | 7 +++++-- + 1 file changed, 5 insertions(+), 2 deletions(-) + +--- a/mm/memory.c ++++ b/mm/memory.c +@@ -4108,9 +4108,12 @@ vm_fault_t finish_fault(struct vm_fault + return VM_FAULT_OOM; + } + +- /* See comment in handle_pte_fault() */ ++ /* ++ * See comment in handle_pte_fault() for how this scenario happens, we ++ * need to return NOPAGE so that we drop this page. ++ */ + if (pmd_devmap_trans_unstable(vmf->pmd)) +- return 0; ++ return VM_FAULT_NOPAGE; + + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); diff --git a/queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch b/queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch new file mode 100644 index 00000000000..c4cf789497d --- /dev/null +++ b/queue-5.18/mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch @@ -0,0 +1,56 @@ +From c2cb0dcce9dd8b748b6ca8bb8d4a389f2e232307 Mon Sep 17 00:00:00 2001 +From: Naoya Horiguchi +Date: Mon, 4 Jul 2022 10:33:05 +0900 +Subject: mm/hugetlb: separate path for hwpoison entry in copy_hugetlb_page_range() + +From: Naoya Horiguchi + +commit c2cb0dcce9dd8b748b6ca8bb8d4a389f2e232307 upstream. + +Originally copy_hugetlb_page_range() handles migration entries and +hwpoisoned entries in similar manner. But recently the related code path +has more code for migration entries, and when +is_writable_migration_entry() was converted to +!is_readable_migration_entry(), hwpoison entries on source processes got +to be unexpectedly updated (which is legitimate for migration entries, but +not for hwpoison entries). This results in unexpected serious issues like +kernel panic when forking processes with hwpoison entries in pmd. + +Separate the if branch into one for hwpoison entries and one for migration +entries. + +Link: https://lkml.kernel.org/r/20220704013312.2415700-3-naoya.horiguchi@linux.dev +Fixes: 6c287605fd56 ("mm: remember exclusively mapped anonymous pages with PG_anon_exclusive") +Signed-off-by: Naoya Horiguchi +Reviewed-by: Miaohe Lin +Reviewed-by: Mike Kravetz +Reviewed-by: Muchun Song +Cc: [5.18] +Cc: David Hildenbrand +Cc: Liu Shixin +Cc: Oscar Salvador +Cc: Yang Shi +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/hugetlb.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +--- a/mm/hugetlb.c ++++ b/mm/hugetlb.c +@@ -4764,8 +4764,13 @@ again: + * sharing with another vma. + */ + ; +- } else if (unlikely(is_hugetlb_entry_migration(entry) || +- is_hugetlb_entry_hwpoisoned(entry))) { ++ } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { ++ bool uffd_wp = huge_pte_uffd_wp(entry); ++ ++ if (!userfaultfd_wp(dst_vma) && uffd_wp) ++ entry = huge_pte_clear_uffd_wp(entry); ++ set_huge_pte_at(dst, addr, dst_pte, entry); ++ } else if (unlikely(is_hugetlb_entry_migration(entry))) { + swp_entry_t swp_entry = pte_to_swp_entry(entry); + + if (is_writable_migration_entry(swp_entry) && cow) { diff --git a/queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch b/queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch new file mode 100644 index 00000000000..75047917b84 --- /dev/null +++ b/queue-5.18/nouveau-svm-fix-to-migrate-all-requested-pages.patch @@ -0,0 +1,46 @@ +From 66cee9097e2b74ff3c8cc040ce5717c521a0c3fa Mon Sep 17 00:00:00 2001 +From: Alistair Popple +Date: Wed, 20 Jul 2022 16:27:45 +1000 +Subject: nouveau/svm: Fix to migrate all requested pages + +From: Alistair Popple + +commit 66cee9097e2b74ff3c8cc040ce5717c521a0c3fa upstream. + +Users may request that pages from an OpenCL SVM allocation be migrated +to the GPU with clEnqueueSVMMigrateMem(). In Nouveau this will call into +nouveau_dmem_migrate_vma() to do the migration. If the total range to be +migrated exceeds SG_MAX_SINGLE_ALLOC the pages will be migrated in +chunks of size SG_MAX_SINGLE_ALLOC. However a typo in updating the +starting address means that only the first chunk will get migrated. + +Fix the calculation so that the entire range will get migrated if +possible. + +Signed-off-by: Alistair Popple +Fixes: e3d8b0890469 ("drm/nouveau/svm: map pages after migration") +Reviewed-by: Ralph Campbell +Reviewed-by: Lyude Paul +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20220720062745.960701-1-apopple@nvidia.com +Cc: # v5.8+ +Signed-off-by: Greg Kroah-Hartman +--- + drivers/gpu/drm/nouveau/nouveau_dmem.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c ++++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c +@@ -680,7 +680,11 @@ nouveau_dmem_migrate_vma(struct nouveau_ + goto out_free_dma; + + for (i = 0; i < npages; i += max) { +- args.end = start + (max << PAGE_SHIFT); ++ if (args.start + (max << PAGE_SHIFT) > end) ++ args.end = end; ++ else ++ args.end = args.start + (max << PAGE_SHIFT); ++ + ret = migrate_vma_setup(&args); + if (ret) + goto out_free_pfns; diff --git a/queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch b/queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch new file mode 100644 index 00000000000..ad4c907bf3f --- /dev/null +++ b/queue-5.18/ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch @@ -0,0 +1,107 @@ +From 38c9c22a85aeed28d0831f230136e9cf6fa2ed44 Mon Sep 17 00:00:00 2001 +From: ChenXiaoSong +Date: Thu, 7 Jul 2022 18:53:29 +0800 +Subject: ntfs: fix use-after-free in ntfs_ucsncmp() + +From: ChenXiaoSong + +commit 38c9c22a85aeed28d0831f230136e9cf6fa2ed44 upstream. + +Syzkaller reported use-after-free bug as follows: + +================================================================== +BUG: KASAN: use-after-free in ntfs_ucsncmp+0x123/0x130 +Read of size 2 at addr ffff8880751acee8 by task a.out/879 + +CPU: 7 PID: 879 Comm: a.out Not tainted 5.19.0-rc4-next-20220630-00001-gcc5218c8bd2c-dirty #7 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014 +Call Trace: + + dump_stack_lvl+0x1c0/0x2b0 + print_address_description.constprop.0.cold+0xd4/0x484 + print_report.cold+0x55/0x232 + kasan_report+0xbf/0xf0 + ntfs_ucsncmp+0x123/0x130 + ntfs_are_names_equal.cold+0x2b/0x41 + ntfs_attr_find+0x43b/0xb90 + ntfs_attr_lookup+0x16d/0x1e0 + ntfs_read_locked_attr_inode+0x4aa/0x2360 + ntfs_attr_iget+0x1af/0x220 + ntfs_read_locked_inode+0x246c/0x5120 + ntfs_iget+0x132/0x180 + load_system_files+0x1cc6/0x3480 + ntfs_fill_super+0xa66/0x1cf0 + mount_bdev+0x38d/0x460 + legacy_get_tree+0x10d/0x220 + vfs_get_tree+0x93/0x300 + do_new_mount+0x2da/0x6d0 + path_mount+0x496/0x19d0 + __x64_sys_mount+0x284/0x300 + do_syscall_64+0x3b/0xc0 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 +RIP: 0033:0x7f3f2118d9ea +Code: 48 8b 0d a9 f4 0b 00 f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 76 f4 0b 00 f7 d8 64 89 01 48 +RSP: 002b:00007ffc269deac8 EFLAGS: 00000202 ORIG_RAX: 00000000000000a5 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f3f2118d9ea +RDX: 0000000020000000 RSI: 0000000020000100 RDI: 00007ffc269dec00 +RBP: 00007ffc269dec80 R08: 00007ffc269deb00 R09: 00007ffc269dec44 +R10: 0000000000000000 R11: 0000000000000202 R12: 000055f81ab1d220 +R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 + + +The buggy address belongs to the physical page: +page:0000000085430378 refcount:1 mapcount:1 mapping:0000000000000000 index:0x555c6a81d pfn:0x751ac +memcg:ffff888101f7e180 +anon flags: 0xfffffc00a0014(uptodate|lru|mappedtodisk|swapbacked|node=0|zone=1|lastcpupid=0x1fffff) +raw: 000fffffc00a0014 ffffea0001bf2988 ffffea0001de2448 ffff88801712e201 +raw: 0000000555c6a81d 0000000000000000 0000000100000000 ffff888101f7e180 +page dumped because: kasan: bad access detected + +Memory state around the buggy address: + ffff8880751acd80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff8880751ace00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +>ffff8880751ace80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ^ + ffff8880751acf00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + ffff8880751acf80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 +================================================================== + +The reason is that struct ATTR_RECORD->name_offset is 6485, end address of +name string is out of bounds. + +Fix this by adding sanity check on end address of attribute name string. + +[akpm@linux-foundation.org: coding-style cleanups] +[chenxiaosong2@huawei.com: cleanup suggested by Hawkins Jiawei] + Link: https://lkml.kernel.org/r/20220709064511.3304299-1-chenxiaosong2@huawei.com +Link: https://lkml.kernel.org/r/20220707105329.4020708-1-chenxiaosong2@huawei.com +Signed-off-by: ChenXiaoSong +Signed-off-by: Hawkins Jiawei +Cc: Anton Altaparmakov +Cc: ChenXiaoSong +Cc: Yongqiang Liu +Cc: Zhang Yi +Cc: Zhang Xiaoxu +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ntfs/attrib.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/ntfs/attrib.c ++++ b/fs/ntfs/attrib.c +@@ -592,8 +592,12 @@ static int ntfs_attr_find(const ATTR_TYP + a = (ATTR_RECORD*)((u8*)ctx->attr + + le32_to_cpu(ctx->attr->length)); + for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) { +- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec + +- le32_to_cpu(ctx->mrec->bytes_allocated)) ++ u8 *mrec_end = (u8 *)ctx->mrec + ++ le32_to_cpu(ctx->mrec->bytes_allocated); ++ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) + ++ a->name_length * sizeof(ntfschar); ++ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end || ++ name_end > mrec_end) + break; + ctx->attr = a; + if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) || diff --git a/queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch b/queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch new file mode 100644 index 00000000000..59e065c04d5 --- /dev/null +++ b/queue-5.18/revert-ocfs2-mount-shared-volume-without-ha-stack.patch @@ -0,0 +1,243 @@ +From c80af0c250c8f8a3c978aa5aafbe9c39b336b813 Mon Sep 17 00:00:00 2001 +From: Junxiao Bi +Date: Fri, 3 Jun 2022 15:28:01 -0700 +Subject: Revert "ocfs2: mount shared volume without ha stack" + +From: Junxiao Bi + +commit c80af0c250c8f8a3c978aa5aafbe9c39b336b813 upstream. + +This reverts commit 912f655d78c5d4ad05eac287f23a435924df7144. + +This commit introduced a regression that can cause mount hung. The +changes in __ocfs2_find_empty_slot causes that any node with none-zero +node number can grab the slot that was already taken by node 0, so node 1 +will access the same journal with node 0, when it try to grab journal +cluster lock, it will hung because it was already acquired by node 0. +It's very easy to reproduce this, in one cluster, mount node 0 first, then +node 1, you will see the following call trace from node 1. + +[13148.735424] INFO: task mount.ocfs2:53045 blocked for more than 122 seconds. +[13148.739691] Not tainted 5.15.0-2148.0.4.el8uek.mountracev2.x86_64 #2 +[13148.742560] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message. +[13148.745846] task:mount.ocfs2 state:D stack: 0 pid:53045 ppid: 53044 flags:0x00004000 +[13148.749354] Call Trace: +[13148.750718] +[13148.752019] ? usleep_range+0x90/0x89 +[13148.753882] __schedule+0x210/0x567 +[13148.755684] schedule+0x44/0xa8 +[13148.757270] schedule_timeout+0x106/0x13c +[13148.759273] ? __prepare_to_swait+0x53/0x78 +[13148.761218] __wait_for_common+0xae/0x163 +[13148.763144] __ocfs2_cluster_lock.constprop.0+0x1d6/0x870 [ocfs2] +[13148.765780] ? ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] +[13148.768312] ocfs2_inode_lock_full_nested+0x18d/0x398 [ocfs2] +[13148.770968] ocfs2_journal_init+0x91/0x340 [ocfs2] +[13148.773202] ocfs2_check_volume+0x39/0x461 [ocfs2] +[13148.775401] ? iput+0x69/0xba +[13148.777047] ocfs2_mount_volume.isra.0.cold+0x40/0x1f5 [ocfs2] +[13148.779646] ocfs2_fill_super+0x54b/0x853 [ocfs2] +[13148.781756] mount_bdev+0x190/0x1b7 +[13148.783443] ? ocfs2_remount+0x440/0x440 [ocfs2] +[13148.785634] legacy_get_tree+0x27/0x48 +[13148.787466] vfs_get_tree+0x25/0xd0 +[13148.789270] do_new_mount+0x18c/0x2d9 +[13148.791046] __x64_sys_mount+0x10e/0x142 +[13148.792911] do_syscall_64+0x3b/0x89 +[13148.794667] entry_SYSCALL_64_after_hwframe+0x170/0x0 +[13148.797051] RIP: 0033:0x7f2309f6e26e +[13148.798784] RSP: 002b:00007ffdcee7d408 EFLAGS: 00000246 ORIG_RAX: 00000000000000a5 +[13148.801974] RAX: ffffffffffffffda RBX: 00007ffdcee7d4a0 RCX: 00007f2309f6e26e +[13148.804815] RDX: 0000559aa762a8ae RSI: 0000559aa939d340 RDI: 0000559aa93a22b0 +[13148.807719] RBP: 00007ffdcee7d5b0 R08: 0000559aa93a2290 R09: 00007f230a0b4820 +[13148.810659] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffdcee7d420 +[13148.813609] R13: 0000000000000000 R14: 0000559aa939f000 R15: 0000000000000000 +[13148.816564] + +To fix it, we can just fix __ocfs2_find_empty_slot. But original commit +introduced the feature to mount ocfs2 locally even it is cluster based, +that is a very dangerous, it can easily cause serious data corruption, +there is no way to stop other nodes mounting the fs and corrupting it. +Setup ha or other cluster-aware stack is just the cost that we have to +take for avoiding corruption, otherwise we have to do it in kernel. + +Link: https://lkml.kernel.org/r/20220603222801.42488-1-junxiao.bi@oracle.com +Fixes: 912f655d78c5("ocfs2: mount shared volume without ha stack") +Signed-off-by: Junxiao Bi +Acked-by: Joseph Qi +Cc: Mark Fasheh +Cc: Joel Becker +Cc: Changwei Ge +Cc: Gang He +Cc: Jun Piao +Cc: +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/ocfs2/ocfs2.h | 4 +--- + fs/ocfs2/slot_map.c | 46 +++++++++++++++++++--------------------------- + fs/ocfs2/super.c | 21 --------------------- + 3 files changed, 20 insertions(+), 51 deletions(-) + +--- a/fs/ocfs2/ocfs2.h ++++ b/fs/ocfs2/ocfs2.h +@@ -277,7 +277,6 @@ enum ocfs2_mount_options + OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ + OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ + OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ +- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ + }; + + #define OCFS2_OSB_SOFT_RO 0x0001 +@@ -673,8 +672,7 @@ static inline int ocfs2_cluster_o2cb_glo + + static inline int ocfs2_mount_local(struct ocfs2_super *osb) + { +- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) +- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); ++ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); + } + + static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) +--- a/fs/ocfs2/slot_map.c ++++ b/fs/ocfs2/slot_map.c +@@ -252,16 +252,14 @@ static int __ocfs2_find_empty_slot(struc + int i, ret = -ENOSPC; + + if ((preferred >= 0) && (preferred < si->si_num_slots)) { +- if (!si->si_slots[preferred].sl_valid || +- !si->si_slots[preferred].sl_node_num) { ++ if (!si->si_slots[preferred].sl_valid) { + ret = preferred; + goto out; + } + } + + for(i = 0; i < si->si_num_slots; i++) { +- if (!si->si_slots[i].sl_valid || +- !si->si_slots[i].sl_node_num) { ++ if (!si->si_slots[i].sl_valid) { + ret = i; + break; + } +@@ -456,30 +454,24 @@ int ocfs2_find_slot(struct ocfs2_super * + spin_lock(&osb->osb_lock); + ocfs2_update_slot_info(si); + +- if (ocfs2_mount_local(osb)) +- /* use slot 0 directly in local mode */ +- slot = 0; +- else { +- /* search for ourselves first and take the slot if it already +- * exists. Perhaps we need to mark this in a variable for our +- * own journal recovery? Possibly not, though we certainly +- * need to warn to the user */ +- slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ /* search for ourselves first and take the slot if it already ++ * exists. Perhaps we need to mark this in a variable for our ++ * own journal recovery? Possibly not, though we certainly ++ * need to warn to the user */ ++ slot = __ocfs2_node_num_to_slot(si, osb->node_num); ++ if (slot < 0) { ++ /* if no slot yet, then just take 1st available ++ * one. */ ++ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (slot < 0) { +- /* if no slot yet, then just take 1st available +- * one. */ +- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); +- if (slot < 0) { +- spin_unlock(&osb->osb_lock); +- mlog(ML_ERROR, "no free slots available!\n"); +- status = -EINVAL; +- goto bail; +- } +- } else +- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " +- "already allocated to this node!\n", +- slot, osb->dev_str); +- } ++ spin_unlock(&osb->osb_lock); ++ mlog(ML_ERROR, "no free slots available!\n"); ++ status = -EINVAL; ++ goto bail; ++ } ++ } else ++ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " ++ "allocated to this node!\n", slot, osb->dev_str); + + ocfs2_set_slot(si, slot, osb->node_num); + osb->slot_num = slot; +--- a/fs/ocfs2/super.c ++++ b/fs/ocfs2/super.c +@@ -172,7 +172,6 @@ enum { + Opt_dir_resv_level, + Opt_journal_async_commit, + Opt_err_cont, +- Opt_nocluster, + Opt_err, + }; + +@@ -206,7 +205,6 @@ static const match_table_t tokens = { + {Opt_dir_resv_level, "dir_resv_level=%u"}, + {Opt_journal_async_commit, "journal_async_commit"}, + {Opt_err_cont, "errors=continue"}, +- {Opt_nocluster, "nocluster"}, + {Opt_err, NULL} + }; + +@@ -618,13 +616,6 @@ static int ocfs2_remount(struct super_bl + goto out; + } + +- tmp = OCFS2_MOUNT_NOCLUSTER; +- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +- ret = -EINVAL; +- mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); +- goto out; +- } +- + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { +@@ -865,7 +856,6 @@ static int ocfs2_verify_userspace_stack( + } + + if (ocfs2_userspace_stack(osb) && +- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && + strncmp(osb->osb_cluster_stack, mopt->cluster_stack, + OCFS2_STACK_LABEL_LEN)) { + mlog(ML_ERROR, +@@ -1144,11 +1134,6 @@ static int ocfs2_fill_super(struct super + osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : + "ordered"); + +- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && +- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) +- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " +- "without cluster aware mode.\n", osb->dev_str); +- + atomic_set(&osb->vol_state, VOLUME_MOUNTED); + wake_up(&osb->osb_mount_event); + +@@ -1455,9 +1440,6 @@ static int ocfs2_parse_options(struct su + case Opt_journal_async_commit: + mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; + break; +- case Opt_nocluster: +- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; +- break; + default: + mlog(ML_ERROR, + "Unrecognized mount option \"%s\" " +@@ -1569,9 +1551,6 @@ static int ocfs2_show_options(struct seq + if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) + seq_printf(s, ",journal_async_commit"); + +- if (opts & OCFS2_MOUNT_NOCLUSTER) +- seq_printf(s, ",nocluster"); +- + return 0; + } + diff --git a/queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch b/queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch new file mode 100644 index 00000000000..2f13db206c8 --- /dev/null +++ b/queue-5.18/s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch @@ -0,0 +1,125 @@ +From 918e75f77af7d2e049bb70469ec0a2c12782d96a Mon Sep 17 00:00:00 2001 +From: Harald Freudenberger +Date: Wed, 13 Jul 2022 15:17:21 +0200 +Subject: s390/archrandom: prevent CPACF trng invocations in interrupt context + +From: Harald Freudenberger + +commit 918e75f77af7d2e049bb70469ec0a2c12782d96a upstream. + +This patch slightly reworks the s390 arch_get_random_seed_{int,long} +implementation: Make sure the CPACF trng instruction is never +called in any interrupt context. This is done by adding an +additional condition in_task(). + +Justification: + +There are some constrains to satisfy for the invocation of the +arch_get_random_seed_{int,long}() functions: +- They should provide good random data during kernel initialization. +- They should not be called in interrupt context as the TRNG + instruction is relatively heavy weight and may for example + make some network loads cause to timeout and buck. + +However, it was not clear what kind of interrupt context is exactly +encountered during kernel init or network traffic eventually calling +arch_get_random_seed_long(). + +After some days of investigations it is clear that the s390 +start_kernel function is not running in any interrupt context and +so the trng is called: + +Jul 11 18:33:39 t35lp54 kernel: [<00000001064e90ca>] arch_get_random_seed_long.part.0+0x32/0x70 +Jul 11 18:33:39 t35lp54 kernel: [<000000010715f246>] random_init+0xf6/0x238 +Jul 11 18:33:39 t35lp54 kernel: [<000000010712545c>] start_kernel+0x4a4/0x628 +Jul 11 18:33:39 t35lp54 kernel: [<000000010590402a>] startup_continue+0x2a/0x40 + +The condition in_task() is true and the CPACF trng provides random data +during kernel startup. + +The network traffic however, is more difficult. A typical call stack +looks like this: + +Jul 06 17:37:07 t35lp54 kernel: [<000000008b5600fc>] extract_entropy.constprop.0+0x23c/0x240 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b560136>] crng_reseed+0x36/0xd8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b5604b8>] crng_make_state+0x78/0x340 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b5607e0>] _get_random_bytes+0x60/0xf8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b56108a>] get_random_u32+0xda/0x248 +Jul 06 17:37:07 t35lp54 kernel: [<000000008aefe7a8>] kfence_guarded_alloc+0x48/0x4b8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008aeff35e>] __kfence_alloc+0x18e/0x1b8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008aef7f10>] __kmalloc_node_track_caller+0x368/0x4d8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b611eac>] kmalloc_reserve+0x44/0xa0 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b611f98>] __alloc_skb+0x90/0x178 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b6120dc>] __napi_alloc_skb+0x5c/0x118 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f06b4>] qeth_extract_skb+0x13c/0x680 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b8f6526>] qeth_poll+0x256/0x3f8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b63d76e>] __napi_poll.constprop.0+0x46/0x2f8 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b63dbec>] net_rx_action+0x1cc/0x408 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b937302>] __do_softirq+0x132/0x6b0 +Jul 06 17:37:07 t35lp54 kernel: [<000000008abf46ce>] __irq_exit_rcu+0x13e/0x170 +Jul 06 17:37:07 t35lp54 kernel: [<000000008abf531a>] irq_exit_rcu+0x22/0x50 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b922506>] do_io_irq+0xe6/0x198 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b935826>] io_int_handler+0xd6/0x110 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b9358a6>] psw_idle_exit+0x0/0xa +Jul 06 17:37:07 t35lp54 kernel: ([<000000008ab9c59a>] arch_cpu_idle+0x52/0xe0) +Jul 06 17:37:07 t35lp54 kernel: [<000000008b933cfe>] default_idle_call+0x6e/0xd0 +Jul 06 17:37:07 t35lp54 kernel: [<000000008ac59f4e>] do_idle+0xf6/0x1b0 +Jul 06 17:37:07 t35lp54 kernel: [<000000008ac5a28e>] cpu_startup_entry+0x36/0x40 +Jul 06 17:37:07 t35lp54 kernel: [<000000008abb0d90>] smp_start_secondary+0x148/0x158 +Jul 06 17:37:07 t35lp54 kernel: [<000000008b935b9e>] restart_int_handler+0x6e/0x90 + +which confirms that the call is in softirq context. So in_task() covers exactly +the cases where we want to have CPACF trng called: not in nmi, not in hard irq, +not in soft irq but in normal task context and during kernel init. + +Signed-off-by: Harald Freudenberger +Acked-by: Jason A. Donenfeld +Reviewed-by: Juergen Christ +Link: https://lore.kernel.org/r/20220713131721.257907-1-freude@linux.ibm.com +Fixes: e4f74400308c ("s390/archrandom: simplify back to earlier design and initialize earlier") +[agordeev@linux.ibm.com changed desc, added Fixes and Link, removed -stable] +Signed-off-by: Alexander Gordeev +Signed-off-by: Greg Kroah-Hartman +--- + arch/s390/include/asm/archrandom.h | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/arch/s390/include/asm/archrandom.h ++++ b/arch/s390/include/asm/archrandom.h +@@ -2,7 +2,7 @@ + /* + * Kernel interface for the s390 arch_random_* functions + * +- * Copyright IBM Corp. 2017, 2020 ++ * Copyright IBM Corp. 2017, 2022 + * + * Author: Harald Freudenberger + * +@@ -14,6 +14,7 @@ + #ifdef CONFIG_ARCH_RANDOM + + #include ++#include + #include + #include + +@@ -32,7 +33,8 @@ static inline bool __must_check arch_get + + static inline bool __must_check arch_get_random_seed_long(unsigned long *v) + { +- if (static_branch_likely(&s390_arch_random_available)) { ++ if (static_branch_likely(&s390_arch_random_available) && ++ in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; +@@ -42,7 +44,8 @@ static inline bool __must_check arch_get + + static inline bool __must_check arch_get_random_seed_int(unsigned int *v) + { +- if (static_branch_likely(&s390_arch_random_available)) { ++ if (static_branch_likely(&s390_arch_random_available) && ++ in_task()) { + cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v)); + atomic64_add(sizeof(*v), &s390_arch_random_counter); + return true; diff --git a/queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch b/queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch new file mode 100644 index 00000000000..71a797768e3 --- /dev/null +++ b/queue-5.18/secretmem-fix-unhandled-fault-in-truncate.patch @@ -0,0 +1,164 @@ +From 84ac013046ccc438af04b7acecd4d3ab84fe4bde Mon Sep 17 00:00:00 2001 +From: Mike Rapoport +Date: Thu, 7 Jul 2022 19:56:50 +0300 +Subject: secretmem: fix unhandled fault in truncate + +From: Mike Rapoport + +commit 84ac013046ccc438af04b7acecd4d3ab84fe4bde upstream. + +syzkaller reports the following issue: + +BUG: unable to handle page fault for address: ffff888021f7e005 +PGD 11401067 P4D 11401067 PUD 11402067 PMD 21f7d063 PTE 800fffffde081060 +Oops: 0002 [#1] PREEMPT SMP KASAN +CPU: 0 PID: 3761 Comm: syz-executor281 Not tainted 5.19.0-rc4-syzkaller-00014-g941e3e791269 #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 +RIP: 0010:memset_erms+0x9/0x10 arch/x86/lib/memset_64.S:64 +Code: c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 f3 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 aa 4c 89 c8 c3 90 49 89 fa 40 0f b6 ce 48 b8 01 01 01 01 01 01 +RSP: 0018:ffffc9000329fa90 EFLAGS: 00010202 +RAX: 0000000000000000 RBX: 0000000000001000 RCX: 0000000000000ffb +RDX: 0000000000000ffb RSI: 0000000000000000 RDI: ffff888021f7e005 +RBP: ffffea000087df80 R08: 0000000000000001 R09: ffff888021f7e005 +R10: ffffed10043efdff R11: 0000000000000000 R12: 0000000000000005 +R13: 0000000000000000 R14: 0000000000001000 R15: 0000000000000ffb +FS: 00007fb29d8b2700(0000) GS:ffff8880b9a00000(0000) knlGS:0000000000000000 +CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 +CR2: ffff888021f7e005 CR3: 0000000026e7b000 CR4: 00000000003506f0 +DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 +DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 +Call Trace: + + zero_user_segments include/linux/highmem.h:272 [inline] + folio_zero_range include/linux/highmem.h:428 [inline] + truncate_inode_partial_folio+0x76a/0xdf0 mm/truncate.c:237 + truncate_inode_pages_range+0x83b/0x1530 mm/truncate.c:381 + truncate_inode_pages mm/truncate.c:452 [inline] + truncate_pagecache+0x63/0x90 mm/truncate.c:753 + simple_setattr+0xed/0x110 fs/libfs.c:535 + secretmem_setattr+0xae/0xf0 mm/secretmem.c:170 + notify_change+0xb8c/0x12b0 fs/attr.c:424 + do_truncate+0x13c/0x200 fs/open.c:65 + do_sys_ftruncate+0x536/0x730 fs/open.c:193 + do_syscall_x64 arch/x86/entry/common.c:50 [inline] + do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80 + entry_SYSCALL_64_after_hwframe+0x46/0xb0 +RIP: 0033:0x7fb29d900899 +Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 11 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 +RSP: 002b:00007fb29d8b2318 EFLAGS: 00000246 ORIG_RAX: 000000000000004d +RAX: ffffffffffffffda RBX: 00007fb29d988408 RCX: 00007fb29d900899 +RDX: 00007fb29d900899 RSI: 0000000000000005 RDI: 0000000000000003 +RBP: 00007fb29d988400 R08: 0000000000000000 R09: 0000000000000000 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb29d98840c +R13: 00007ffca01a23bf R14: 00007fb29d8b2400 R15: 0000000000022000 + +Modules linked in: +CR2: ffff888021f7e005 +---[ end trace 0000000000000000 ]--- + +Eric Biggers suggested that this happens when +secretmem_setattr()->simple_setattr() races with secretmem_fault() so that +a page that is faulted in by secretmem_fault() (and thus removed from the +direct map) is zeroed by inode truncation right afterwards. + +Use mapping->invalidate_lock to make secretmem_fault() and +secretmem_setattr() mutually exclusive. + +[rppt@linux.ibm.com: v3] + Link: https://lkml.kernel.org/r/20220714091337.412297-1-rppt@kernel.org +Link: https://lkml.kernel.org/r/20220707165650.248088-1-rppt@kernel.org +Reported-by: syzbot+9bd2b7adbd34b30b87e4@syzkaller.appspotmail.com +Signed-off-by: Mike Rapoport +Suggested-by: Eric Biggers +Reviewed-by: Axel Rasmussen +Reviewed-by: Jan Kara +Cc: Eric Biggers +Cc: Hillf Danton +Cc: Matthew Wilcox +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + mm/secretmem.c | 33 ++++++++++++++++++++++++++------- + 1 file changed, 26 insertions(+), 7 deletions(-) + +--- a/mm/secretmem.c ++++ b/mm/secretmem.c +@@ -55,22 +55,28 @@ static vm_fault_t secretmem_fault(struct + gfp_t gfp = vmf->gfp_mask; + unsigned long addr; + struct page *page; ++ vm_fault_t ret; + int err; + + if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode)) + return vmf_error(-EINVAL); + ++ filemap_invalidate_lock_shared(mapping); ++ + retry: + page = find_lock_page(mapping, offset); + if (!page) { + page = alloc_page(gfp | __GFP_ZERO); +- if (!page) +- return VM_FAULT_OOM; ++ if (!page) { ++ ret = VM_FAULT_OOM; ++ goto out; ++ } + + err = set_direct_map_invalid_noflush(page); + if (err) { + put_page(page); +- return vmf_error(err); ++ ret = vmf_error(err); ++ goto out; + } + + __SetPageUptodate(page); +@@ -86,7 +92,8 @@ retry: + if (err == -EEXIST) + goto retry; + +- return vmf_error(err); ++ ret = vmf_error(err); ++ goto out; + } + + addr = (unsigned long)page_address(page); +@@ -94,7 +101,11 @@ retry: + } + + vmf->page = page; +- return VM_FAULT_LOCKED; ++ ret = VM_FAULT_LOCKED; ++ ++out: ++ filemap_invalidate_unlock_shared(mapping); ++ return ret; + } + + static const struct vm_operations_struct secretmem_vm_ops = { +@@ -162,12 +173,20 @@ static int secretmem_setattr(struct user + struct dentry *dentry, struct iattr *iattr) + { + struct inode *inode = d_inode(dentry); ++ struct address_space *mapping = inode->i_mapping; + unsigned int ia_valid = iattr->ia_valid; ++ int ret; ++ ++ filemap_invalidate_lock(mapping); + + if ((ia_valid & ATTR_SIZE) && inode->i_size) +- return -EINVAL; ++ ret = -EINVAL; ++ else ++ ret = simple_setattr(mnt_userns, dentry, iattr); ++ ++ filemap_invalidate_unlock(mapping); + +- return simple_setattr(mnt_userns, dentry, iattr); ++ return ret; + } + + static const struct inode_operations secretmem_iops = { diff --git a/queue-5.18/series b/queue-5.18/series new file mode 100644 index 00000000000..d8c3c2134e6 --- /dev/null +++ b/queue-5.18/series @@ -0,0 +1,27 @@ +bluetooth-always-set-event-mask-on-suspend.patch +bluetooth-l2cap-fix-use-after-free-caused-by-l2cap_chan_put.patch +arm-dts-lan966x-fix-sys_clk-frequency.patch +arm-pxa2xx-fix-gpio-descriptor-tables.patch +revert-ocfs2-mount-shared-volume-without-ha-stack.patch +userfaultfd-provide-properly-masked-address-for-huge-pages.patch +ntfs-fix-use-after-free-in-ntfs_ucsncmp.patch +fs-sendfile-handles-o_nonblock-of-out_fd.patch +secretmem-fix-unhandled-fault-in-truncate.patch +mm-hugetlb-separate-path-for-hwpoison-entry-in-copy_hugetlb_page_range.patch +mm-fix-page-leak-with-multiple-threads-mapping-the-same-page.patch +mm-fix-missing-wake-up-event-for-fsdax-pages.patch +hugetlb-fix-memoryleak-in-hugetlb_mcopy_atomic_pte.patch +asm-generic-remove-a-broken-and-needless-ifdef-conditional.patch +s390-archrandom-prevent-cpacf-trng-invocations-in-interrupt-context.patch +intel_idle-fix-false-positive-rcu-splats-due-to-incorrect-hardirqs-state.patch +nouveau-svm-fix-to-migrate-all-requested-pages.patch +drm-simpledrm-fix-return-type-of-simpledrm_simple_display_pipe_mode_valid.patch +watch_queue-fix-missing-rcu-annotation.patch +watch_queue-fix-missing-locking-in-add_watch_to_object.patch +tcp-fix-data-races-around-sysctl_tcp_dsack.patch +tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch +tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch +tcp-fix-a-data-race-around-sysctl_tcp_frto.patch +tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch +tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch +bridge-do-not-send-empty-ifla_af_spec-attribute.patch diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch new file mode 100644 index 00000000000..5f3133c95fc --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_adv_win_scale.patch @@ -0,0 +1,31 @@ +From 36eeee75ef0157e42fb6593dcc65daab289b559e Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:14 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_adv_win_scale. + +From: Kuniyuki Iwashima + +commit 36eeee75ef0157e42fb6593dcc65daab289b559e upstream. + +While reading sysctl_tcp_adv_win_scale, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + include/net/tcp.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/include/net/tcp.h ++++ b/include/net/tcp.h +@@ -1437,7 +1437,7 @@ void tcp_select_initial_window(const str + + static inline int tcp_win_from_space(const struct sock *sk, int space) + { +- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; ++ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale); + + return tcp_adv_win_scale <= 0 ? + (space>>(-tcp_adv_win_scale)) : diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch new file mode 100644 index 00000000000..27bcefc78cc --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_app_win.patch @@ -0,0 +1,31 @@ +From 02ca527ac5581cf56749db9fd03d854e842253dd Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:13 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_app_win. + +From: Kuniyuki Iwashima + +commit 02ca527ac5581cf56749db9fd03d854e842253dd upstream. + +While reading sysctl_tcp_app_win, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -534,7 +534,7 @@ static void tcp_grow_window(struct sock + */ + static void tcp_init_buffer_space(struct sock *sk) + { +- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; ++ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win); + struct tcp_sock *tp = tcp_sk(sk); + int maxwin; + diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch new file mode 100644 index 00000000000..df8bb55dd9e --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_frto.patch @@ -0,0 +1,31 @@ +From 706c6202a3589f290e1ef9be0584a8f4a3cc0507 Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:15 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_frto. + +From: Kuniyuki Iwashima + +commit 706c6202a3589f290e1ef9be0584a8f4a3cc0507 upstream. + +While reading sysctl_tcp_frto, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -2175,7 +2175,7 @@ void tcp_enter_loss(struct sock *sk) + * loss recovery is underway except recurring timeout(s) on + * the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing + */ +- tp->frto = net->ipv4.sysctl_tcp_frto && ++ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) && + (new_recovery || icsk->icsk_retransmits) && + !inet_csk(sk)->icsk_mtup.probe_size; + } diff --git a/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch new file mode 100644 index 00000000000..52398e90057 --- /dev/null +++ b/queue-5.18/tcp-fix-a-data-race-around-sysctl_tcp_nometrics_save.patch @@ -0,0 +1,31 @@ +From 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:16 -0700 +Subject: tcp: Fix a data-race around sysctl_tcp_nometrics_save. + +From: Kuniyuki Iwashima + +commit 8499a2454d9e8a55ce616ede9f9580f36fd5b0f3 upstream. + +While reading sysctl_tcp_nometrics_save, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its reader. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_metrics.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -329,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) + int m; + + sk_dst_confirm(sk); +- if (net->ipv4.sysctl_tcp_nometrics_save || !dst) ++ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst) + return; + + rcu_read_lock(); diff --git a/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch new file mode 100644 index 00000000000..3aebae641d9 --- /dev/null +++ b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_dsack.patch @@ -0,0 +1,40 @@ +From 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:12 -0700 +Subject: tcp: Fix data-races around sysctl_tcp_dsack. + +From: Kuniyuki Iwashima + +commit 58ebb1c8b35a8ef38cd6927431e0fa7b173a632d upstream. + +While reading sysctl_tcp_dsack, it can be changed concurrently. +Thus, we need to add READ_ONCE() to its readers. + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_input.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +--- a/net/ipv4/tcp_input.c ++++ b/net/ipv4/tcp_input.c +@@ -4426,7 +4426,7 @@ static void tcp_dsack_set(struct sock *s + { + struct tcp_sock *tp = tcp_sk(sk); + +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { + int mib_idx; + + if (before(seq, tp->rcv_nxt)) +@@ -4473,7 +4473,7 @@ static void tcp_send_dupack(struct sock + NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); + tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS); + +- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { ++ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) { + u32 end_seq = TCP_SKB_CB(skb)->end_seq; + + tcp_rcv_spurious_retrans(sk, skb); diff --git a/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch new file mode 100644 index 00000000000..d562d23bb8b --- /dev/null +++ b/queue-5.18/tcp-fix-data-races-around-sysctl_tcp_no_ssthresh_metrics_save.patch @@ -0,0 +1,58 @@ +From ab1ba21b523ab496b1a4a8e396333b24b0a18f9a Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima +Date: Wed, 20 Jul 2022 09:50:17 -0700 +Subject: tcp: Fix data-races around sysctl_tcp_no_ssthresh_metrics_save. + +From: Kuniyuki Iwashima + +commit ab1ba21b523ab496b1a4a8e396333b24b0a18f9a upstream. + +While reading sysctl_tcp_no_ssthresh_metrics_save, it can be changed +concurrently. Thus, we need to add READ_ONCE() to its readers. + +Fixes: 65e6d90168f3 ("net-tcp: Disable TCP ssthresh metrics cache by default") +Signed-off-by: Kuniyuki Iwashima +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +--- + net/ipv4/tcp_metrics.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +--- a/net/ipv4/tcp_metrics.c ++++ b/net/ipv4/tcp_metrics.c +@@ -385,7 +385,7 @@ void tcp_update_metrics(struct sock *sk) + + if (tcp_in_initial_slowstart(tp)) { + /* Slow start still did not finish. */ +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && (tcp_snd_cwnd(tp) >> 1) > val) +@@ -401,7 +401,7 @@ void tcp_update_metrics(struct sock *sk) + } else if (!tcp_in_slow_start(tp) && + icsk->icsk_ca_state == TCP_CA_Open) { + /* Cong. avoidance phase, cwnd is reliable. */ +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) + tcp_metric_set(tm, TCP_METRIC_SSTHRESH, + max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh)); +@@ -418,7 +418,7 @@ void tcp_update_metrics(struct sock *sk) + tcp_metric_set(tm, TCP_METRIC_CWND, + (val + tp->snd_ssthresh) >> 1); + } +- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save && ++ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) && + !tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) { + val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val && tp->snd_ssthresh > val) +@@ -463,7 +463,7 @@ void tcp_init_metrics(struct sock *sk) + if (tcp_metric_locked(tm, TCP_METRIC_CWND)) + tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND); + +- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ? ++ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ? + 0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH); + if (val) { + tp->snd_ssthresh = val; diff --git a/queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch b/queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch new file mode 100644 index 00000000000..bd48417ec89 --- /dev/null +++ b/queue-5.18/userfaultfd-provide-properly-masked-address-for-huge-pages.patch @@ -0,0 +1,78 @@ +From d172b1a3bd065dd89234eac547fc62cf80681631 Mon Sep 17 00:00:00 2001 +From: Nadav Amit +Date: Mon, 11 Jul 2022 09:59:06 -0700 +Subject: userfaultfd: provide properly masked address for huge-pages + +From: Nadav Amit + +commit d172b1a3bd065dd89234eac547fc62cf80681631 upstream. + +Commit 824ddc601adc ("userfaultfd: provide unmasked address on +page-fault") was introduced to fix an old bug, in which the offset in the +address of a page-fault was masked. Concerns were raised - although were +never backed by actual code - that some userspace code might break because +the bug has been around for quite a while. To address these concerns a +new flag was introduced, and only when this flag is set by the user, +userfaultfd provides the exact address of the page-fault. + +The commit however had a bug, and if the flag is unset, the offset was +always masked based on a base-page granularity. Yet, for huge-pages, the +behavior prior to the commit was that the address is masked to the +huge-page granulrity. + +While there are no reports on real breakage, fix this issue. If the flag +is unset, use the address with the masking that was done before. + +Link: https://lkml.kernel.org/r/20220711165906.2682-1-namit@vmware.com +Fixes: 824ddc601adc ("userfaultfd: provide unmasked address on page-fault") +Signed-off-by: Nadav Amit +Reported-by: James Houghton +Reviewed-by: Mike Rapoport +Reviewed-by: Peter Xu +Reviewed-by: James Houghton +Cc: David Hildenbrand +Cc: Jan Kara +Cc: Andrea Arcangeli +Cc: +Signed-off-by: Andrew Morton +Signed-off-by: Greg Kroah-Hartman +--- + fs/userfaultfd.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +--- a/fs/userfaultfd.c ++++ b/fs/userfaultfd.c +@@ -191,17 +191,19 @@ static inline void msg_init(struct uffd_ + } + + static inline struct uffd_msg userfault_msg(unsigned long address, ++ unsigned long real_address, + unsigned int flags, + unsigned long reason, + unsigned int features) + { + struct uffd_msg msg; ++ + msg_init(&msg); + msg.event = UFFD_EVENT_PAGEFAULT; + +- if (!(features & UFFD_FEATURE_EXACT_ADDRESS)) +- address &= PAGE_MASK; +- msg.arg.pagefault.address = address; ++ msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ? ++ real_address : address; ++ + /* + * These flags indicate why the userfault occurred: + * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault. +@@ -485,8 +487,8 @@ vm_fault_t handle_userfault(struct vm_fa + + init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); + uwq.wq.private = current; +- uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason, +- ctx->features); ++ uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags, ++ reason, ctx->features); + uwq.ctx = ctx; + uwq.waken = false; + diff --git a/queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch b/queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch new file mode 100644 index 00000000000..d1042b62191 --- /dev/null +++ b/queue-5.18/watch_queue-fix-missing-locking-in-add_watch_to_object.patch @@ -0,0 +1,115 @@ +From e64ab2dbd882933b65cd82ff6235d705ad65dbb6 Mon Sep 17 00:00:00 2001 +From: Linus Torvalds +Date: Thu, 28 Jul 2022 10:31:12 +0100 +Subject: watch_queue: Fix missing locking in add_watch_to_object() + +From: Linus Torvalds + +commit e64ab2dbd882933b65cd82ff6235d705ad65dbb6 upstream. + +If a watch is being added to a queue, it needs to guard against +interference from addition of a new watch, manual removal of a watch and +removal of a watch due to some other queue being destroyed. + +KEYCTL_WATCH_KEY guards against this for the same {key,queue} pair by +holding the key->sem writelocked and by holding refs on both the key and +the queue - but that doesn't prevent interaction from other {key,queue} +pairs. + +While add_watch_to_object() does take the spinlock on the event queue, +it doesn't take the lock on the source's watch list. The assumption was +that the caller would prevent that (say by taking key->sem) - but that +doesn't prevent interference from the destruction of another queue. + +Fix this by locking the watcher list in add_watch_to_object(). + +Fixes: c73be61cede5 ("pipe: Add general notification queue support") +Reported-by: syzbot+03d7b43290037d1f87ca@syzkaller.appspotmail.com +Signed-off-by: David Howells +cc: keyrings@vger.kernel.org +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/watch_queue.c | 58 +++++++++++++++++++++++++++++++-------------------- + 1 file changed, 36 insertions(+), 22 deletions(-) + +--- a/kernel/watch_queue.c ++++ b/kernel/watch_queue.c +@@ -454,6 +454,33 @@ void init_watch(struct watch *watch, str + rcu_assign_pointer(watch->queue, wqueue); + } + ++static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue) ++{ ++ const struct cred *cred; ++ struct watch *w; ++ ++ hlist_for_each_entry(w, &wlist->watchers, list_node) { ++ struct watch_queue *wq = rcu_access_pointer(w->queue); ++ if (wqueue == wq && watch->id == w->id) ++ return -EBUSY; ++ } ++ ++ cred = current_cred(); ++ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) { ++ atomic_dec(&cred->user->nr_watches); ++ return -EAGAIN; ++ } ++ ++ watch->cred = get_cred(cred); ++ rcu_assign_pointer(watch->watch_list, wlist); ++ ++ kref_get(&wqueue->usage); ++ kref_get(&watch->usage); ++ hlist_add_head(&watch->queue_node, &wqueue->watches); ++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers); ++ return 0; ++} ++ + /** + * add_watch_to_object - Add a watch on an object to a watch list + * @watch: The watch to add +@@ -468,34 +495,21 @@ void init_watch(struct watch *watch, str + */ + int add_watch_to_object(struct watch *watch, struct watch_list *wlist) + { +- struct watch_queue *wqueue = rcu_access_pointer(watch->queue); +- struct watch *w; ++ struct watch_queue *wqueue; ++ int ret = -ENOENT; + +- hlist_for_each_entry(w, &wlist->watchers, list_node) { +- struct watch_queue *wq = rcu_access_pointer(w->queue); +- if (wqueue == wq && watch->id == w->id) +- return -EBUSY; +- } +- +- watch->cred = get_current_cred(); +- rcu_assign_pointer(watch->watch_list, wlist); +- +- if (atomic_inc_return(&watch->cred->user->nr_watches) > +- task_rlimit(current, RLIMIT_NOFILE)) { +- atomic_dec(&watch->cred->user->nr_watches); +- put_cred(watch->cred); +- return -EAGAIN; +- } ++ rcu_read_lock(); + ++ wqueue = rcu_access_pointer(watch->queue); + if (lock_wqueue(wqueue)) { +- kref_get(&wqueue->usage); +- kref_get(&watch->usage); +- hlist_add_head(&watch->queue_node, &wqueue->watches); ++ spin_lock(&wlist->lock); ++ ret = add_one_watch(watch, wlist, wqueue); ++ spin_unlock(&wlist->lock); + unlock_wqueue(wqueue); + } + +- hlist_add_head_rcu(&watch->list_node, &wlist->watchers); +- return 0; ++ rcu_read_unlock(); ++ return ret; + } + EXPORT_SYMBOL(add_watch_to_object); + diff --git a/queue-5.18/watch_queue-fix-missing-rcu-annotation.patch b/queue-5.18/watch_queue-fix-missing-rcu-annotation.patch new file mode 100644 index 00000000000..af06b213eac --- /dev/null +++ b/queue-5.18/watch_queue-fix-missing-rcu-annotation.patch @@ -0,0 +1,35 @@ +From e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 Mon Sep 17 00:00:00 2001 +From: David Howells +Date: Thu, 28 Jul 2022 10:31:06 +0100 +Subject: watch_queue: Fix missing rcu annotation + +From: David Howells + +commit e0339f036ef4beb9b20f0b6532a1e0ece7f594c6 upstream. + +Since __post_watch_notification() walks wlist->watchers with only the +RCU read lock held, we need to use RCU methods to add to the list (we +already use RCU methods to remove from the list). + +Fix add_watch_to_object() to use hlist_add_head_rcu() instead of +hlist_add_head() for that list. + +Fixes: c73be61cede5 ("pipe: Add general notification queue support") +Signed-off-by: David Howells +Signed-off-by: Linus Torvalds +Signed-off-by: Greg Kroah-Hartman +--- + kernel/watch_queue.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/kernel/watch_queue.c ++++ b/kernel/watch_queue.c +@@ -494,7 +494,7 @@ int add_watch_to_object(struct watch *wa + unlock_wqueue(wqueue); + } + +- hlist_add_head(&watch->list_node, &wlist->watchers); ++ hlist_add_head_rcu(&watch->list_node, &wlist->watchers); + return 0; + } + EXPORT_SYMBOL(add_watch_to_object); -- 2.47.3