From 965501d00facf6945aa90a7b9ee625c40a21e6c0 Mon Sep 17 00:00:00 2001 From: Sasha Levin Date: Sat, 3 Aug 2024 10:48:13 -0400 Subject: [PATCH] Fixes for 5.15 Signed-off-by: Sasha Levin --- ...pq8074-disable-ss-instance-in-parkmo.patch | 53 +++ ...sm8996-move-clock-cells-to-qmp-phy-c.patch | 150 +++++++ ...sm8998-disable-ss-instance-in-parkmo.patch | 45 ++ ...com-msm8998-drop-usb-phy-clock-index.patch | 38 ++ ...sm8998-switch-usb-qmp-phy-to-new-sty.patch | 85 ++++ ...nx-check-return-status-of-get_api_ve.patch | 53 +++ ...ll-mstb-marked-as-not-probed-after-s.patch | 61 +++ ...xtent-status-again-before-inserting-.patch | 100 +++++ ...exclusive-lock-while-inserting-delal.patch | 114 +++++ ...-a-common-helper-to-query-extent-map.patch | 104 +++++ ...ke-ext4_es_insert_extent-return-void.patch | 158 +++++++ .../ext4-refactor-ext4_da_map_blocks.patch | 93 ++++ ...eg_all_data_atgc-if-blkaddr-is-valid.patch | 59 +++ ...d-use-ssr-allocate-when-do-defragmen.patch | 75 ++++ ...-f2fs_ipu_honor_opu_write-ipu-policy.patch | 230 ++++++++++ ...-pm-device-to-originate-from-irq-dom.patch | 122 +++++ ...oint_restore_ns_capable-to-modify-c-.patch | 110 +++++ ...sions-for-checkpoint_restart-sysctls.patch | 137 ++++++ ...-remove-fallback-for-config_proc_sys.patch | 69 +++ ...ore-ipc-sysctls-in-the-ipc-namespace.patch | 406 +++++++++++++++++ ...-mqueue-sysctls-in-the-ipc-namespace.patch | 323 ++++++++++++++ ...ce-address-selection-with-route-leak.patch | 53 +++ ...-imx-irqsteer-add-runtime-pm-support.patch | 85 ++++ ...mx-irqsteer-constify-irq_chip-struct.patch | 36 ++ ...teer-handle-runtime-power-management.patch | 107 +++++ ...-fixed-unbalanced-fwnode-get-and-put.patch | 70 +++ ...omain-use-return-value-of-strreplace.patch | 39 ++ ...l-synchronize_rcu-before-calling-tri.patch | 60 +++ ...ove-unused-function-led_trigger_rena.patch | 77 ++++ ...re-brightness-set-by-led_trigger_eve.patch | 98 ++++ ...se-rcu-to-protect-the-led_cdevs-list.patch | 166 +++++++ ...ush-pending-brightness-before-activa.patch | 66 +++ ...ts-loongson-fix-liointc-irq-polarity.patch | 172 +++++++ ...-loongson-fix-ls2k1000-rtc-interrupt.patch | 38 ++ ...dts-add-rtc-support-to-loongson-2k10.patch | 42 ++ ...n64-dts-fix-pcie-port-nodes-for-ls7a.patch | 161 +++++++ ...ndex-to-flow-struct-and-avoid-oif-re.patch | 419 ++++++++++++++++++ ...proc-fix-refcount-mistake-in-imx_rpr.patch | 64 +++ queue-5.15/series | 43 ++ ...pm_init_finalize-to-zynqmp_pm_domain.patch | 92 ++++ ...nge-system-v-ipc-sysctls-inside-ipc-.patch | 140 ++++++ ...change-limits-for-posix-messages-que.patch | 95 ++++ ...sysctl-always-initialize-i_uid-i_gid.patch | 52 +++ ...drop-unused-argument-ctl_table_root-.patch | 127 ++++++ 44 files changed, 4887 insertions(+) create mode 100644 queue-5.15/arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch create mode 100644 queue-5.15/arm64-dts-qcom-msm8996-move-clock-cells-to-qmp-phy-c.patch create mode 100644 queue-5.15/arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch create mode 100644 queue-5.15/arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch create mode 100644 queue-5.15/arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch create mode 100644 queue-5.15/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch create mode 100644 queue-5.15/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-s.patch create mode 100644 queue-5.15/ext4-check-the-extent-status-again-before-inserting-.patch create mode 100644 queue-5.15/ext4-convert-to-exclusive-lock-while-inserting-delal.patch create mode 100644 queue-5.15/ext4-factor-out-a-common-helper-to-query-extent-map.patch create mode 100644 queue-5.15/ext4-make-ext4_es_insert_extent-return-void.patch create mode 100644 queue-5.15/ext4-refactor-ext4_da_map_blocks.patch create mode 100644 queue-5.15/f2fs-assign-curseg_all_data_atgc-if-blkaddr-is-valid.patch create mode 100644 queue-5.15/f2fs-fix-to-avoid-use-ssr-allocate-when-do-defragmen.patch create mode 100644 queue-5.15/f2fs-introduce-f2fs_ipu_honor_opu_write-ipu-policy.patch create mode 100644 queue-5.15/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch create mode 100644 queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch create mode 100644 queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch create mode 100644 queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch create mode 100644 queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch create mode 100644 queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch create mode 100644 queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch create mode 100644 queue-5.15/irqchip-imx-irqsteer-add-runtime-pm-support.patch create mode 100644 queue-5.15/irqchip-imx-irqsteer-constify-irq_chip-struct.patch create mode 100644 queue-5.15/irqchip-imx-irqsteer-handle-runtime-power-management.patch create mode 100644 queue-5.15/irqdomain-fixed-unbalanced-fwnode-get-and-put.patch create mode 100644 queue-5.15/irqdomain-use-return-value-of-strreplace.patch create mode 100644 queue-5.15/leds-trigger-call-synchronize_rcu-before-calling-tri.patch create mode 100644 queue-5.15/leds-trigger-remove-unused-function-led_trigger_rena.patch create mode 100644 queue-5.15/leds-trigger-store-brightness-set-by-led_trigger_eve.patch create mode 100644 queue-5.15/leds-trigger-use-rcu-to-protect-the-led_cdevs-list.patch create mode 100644 queue-5.15/leds-triggers-flush-pending-brightness-before-activa.patch create mode 100644 queue-5.15/mips-dts-loongson-fix-liointc-irq-polarity.patch create mode 100644 queue-5.15/mips-dts-loongson-fix-ls2k1000-rtc-interrupt.patch create mode 100644 queue-5.15/mips-loongson64-dts-add-rtc-support-to-loongson-2k10.patch create mode 100644 queue-5.15/mips-loongson64-dts-fix-pcie-port-nodes-for-ls7a.patch create mode 100644 queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch create mode 100644 queue-5.15/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rpr.patch create mode 100644 queue-5.15/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch create mode 100644 queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch create mode 100644 queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch create mode 100644 queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch create mode 100644 queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch diff --git a/queue-5.15/arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch b/queue-5.15/arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch new file mode 100644 index 00000000000..d46755a830c --- /dev/null +++ b/queue-5.15/arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch @@ -0,0 +1,53 @@ +From 5e45369d7cc95bca5be49b604ab2468eae9c3192 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Jul 2024 20:58:42 +0530 +Subject: arm64: dts: qcom: ipq8074: Disable SS instance in Parkmode for USB + +From: Krishna Kurapati + +[ Upstream commit dc6ba95c6c4400a84cca5b419b34ae852a08cfb5 ] + +For Gen-1 targets like IPQ8074, it is seen that stressing out the +controller in host mode results in HC died error: + + xhci-hcd.12.auto: xHCI host not responding to stop endpoint command + xhci-hcd.12.auto: xHCI host controller not responding, assume dead + xhci-hcd.12.auto: HC died; cleaning up + +And at this instant only restarting the host mode fixes it. Disable +SuperSpeed instance in park mode for IPQ8074 to mitigate this issue. + +Cc: stable@vger.kernel.org +Fixes: 5e09bc51d07b ("arm64: dts: ipq8074: enable USB support") +Signed-off-by: Krishna Kurapati +Reviewed-by: Konrad Dybcio +Link: https://lore.kernel.org/r/20240704152848.3380602-3-quic_kriskura@quicinc.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi +index 0a4c5b847ddd5..384904344baf0 100644 +--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi ++++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi +@@ -514,6 +514,7 @@ dwc_0: dwc3@8a00000 { + interrupts = ; + phys = <&qusb_phy_0>, <&usb0_ssphy>; + phy-names = "usb2-phy", "usb3-phy"; ++ snps,parkmode-disable-ss-quirk; + snps,is-utmi-l1-suspend; + snps,hird-threshold = /bits/ 8 <0x0>; + snps,dis_u2_susphy_quirk; +@@ -554,6 +555,7 @@ dwc_1: dwc3@8c00000 { + interrupts = ; + phys = <&qusb_phy_1>, <&usb1_ssphy>; + phy-names = "usb2-phy", "usb3-phy"; ++ snps,parkmode-disable-ss-quirk; + snps,is-utmi-l1-suspend; + snps,hird-threshold = /bits/ 8 <0x0>; + snps,dis_u2_susphy_quirk; +-- +2.43.0 + diff --git a/queue-5.15/arm64-dts-qcom-msm8996-move-clock-cells-to-qmp-phy-c.patch b/queue-5.15/arm64-dts-qcom-msm8996-move-clock-cells-to-qmp-phy-c.patch new file mode 100644 index 00000000000..7c41ad730a9 --- /dev/null +++ b/queue-5.15/arm64-dts-qcom-msm8996-move-clock-cells-to-qmp-phy-c.patch @@ -0,0 +1,150 @@ +From 2364c0737e9571c77f943a389485aaf242c61ece Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 Sep 2021 11:42:46 +0800 +Subject: arm64: dts: qcom: msm8996: Move '#clock-cells' to QMP PHY child node + +From: Shawn Guo + +[ Upstream commit 82d61e19fccbf2fe7c018765b3799791916e7f31 ] + +'#clock-cells' is a required property of QMP PHY child node, not itself. +Move it to fix the dtbs_check warnings. + +There are only '#clock-cells' removal from SM8350 QMP PHY nodes, because +child nodes already have the property. + +Signed-off-by: Shawn Guo +Signed-off-by: Bjorn Andersson +Link: https://lore.kernel.org/r/20210929034253.24570-4-shawn.guo@linaro.org +Stable-dep-of: 0046325ae520 ("arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/ipq8074.dtsi | 4 ++-- + arch/arm64/boot/dts/qcom/msm8996.dtsi | 4 ++-- + arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- + arch/arm64/boot/dts/qcom/sm8350.dtsi | 3 --- + 4 files changed, 5 insertions(+), 8 deletions(-) + +diff --git a/arch/arm64/boot/dts/qcom/ipq8074.dtsi b/arch/arm64/boot/dts/qcom/ipq8074.dtsi +index 17eeff106bab7..0a4c5b847ddd5 100644 +--- a/arch/arm64/boot/dts/qcom/ipq8074.dtsi ++++ b/arch/arm64/boot/dts/qcom/ipq8074.dtsi +@@ -91,7 +91,6 @@ soc: soc { + ssphy_1: phy@58000 { + compatible = "qcom,ipq8074-qmp-usb3-phy"; + reg = <0x00058000 0x1c4>; +- #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; +@@ -112,6 +111,7 @@ usb1_ssphy: phy@58200 { + <0x00058800 0x1f8>, /* PCS */ + <0x00058600 0x044>; /* PCS misc*/ + #phy-cells = <0>; ++ #clock-cells = <1>; + clocks = <&gcc GCC_USB1_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3phy_1_cc_pipe_clk"; +@@ -134,7 +134,6 @@ qusb_phy_1: phy@59000 { + ssphy_0: phy@78000 { + compatible = "qcom,ipq8074-qmp-usb3-phy"; + reg = <0x00078000 0x1c4>; +- #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; +@@ -155,6 +154,7 @@ usb0_ssphy: phy@78200 { + <0x00078800 0x1f8>, /* PCS */ + <0x00078600 0x044>; /* PCS misc*/ + #phy-cells = <0>; ++ #clock-cells = <1>; + clocks = <&gcc GCC_USB0_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3phy_0_cc_pipe_clk"; +diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi +index 210016ff50449..9b74cf57b6d1c 100644 +--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi +@@ -615,7 +615,6 @@ soc: soc { + pcie_phy: phy@34000 { + compatible = "qcom,msm8996-qmp-pcie-phy"; + reg = <0x00034000 0x488>; +- #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; +@@ -637,6 +636,7 @@ pciephy_0: phy@35000 { + <0x00035400 0x1dc>; + #phy-cells = <0>; + ++ #clock-cells = <1>; + clock-output-names = "pcie_0_pipe_clk_src"; + clocks = <&gcc GCC_PCIE_0_PIPE_CLK>; + clock-names = "pipe0"; +@@ -2642,7 +2642,6 @@ usb3_dwc3: dwc3@6a00000 { + usb3phy: phy@7410000 { + compatible = "qcom,msm8996-qmp-usb3-phy"; + reg = <0x07410000 0x1c4>; +- #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; +@@ -2663,6 +2662,7 @@ ssusb_phy_0: phy@7410200 { + <0x07410600 0x1a8>; + #phy-cells = <0>; + ++ #clock-cells = <1>; + clock-output-names = "usb3_phy_pipe_clk_src"; + clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; + clock-names = "pipe0"; +diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi +index d636718adbde2..42329e78437e9 100644 +--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi +@@ -1993,7 +1993,6 @@ usb3phy: phy@c010000 { + compatible = "qcom,msm8998-qmp-usb3-phy"; + reg = <0x0c010000 0x18c>; + status = "disabled"; +- #clock-cells = <1>; + #address-cells = <1>; + #size-cells = <1>; + ranges; +@@ -2014,6 +2013,7 @@ usb1_ssphy: phy@c010200 { + <0xc010600 0x128>, + <0xc010800 0x200>; + #phy-cells = <0>; ++ #clock-cells = <1>; + clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3_phy_pipe_clk_src"; +diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi +index b0ba63b5869d2..8506dc841c869 100644 +--- a/arch/arm64/boot/dts/qcom/sm8350.dtsi ++++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi +@@ -1119,7 +1119,6 @@ ufs_mem_phy: phy@1d87000 { + reg = <0 0x01d87000 0 0x1c4>; + #address-cells = <2>; + #size-cells = <2>; +- #clock-cells = <1>; + ranges; + clock-names = "ref", + "ref_aux"; +@@ -1254,7 +1253,6 @@ usb_1_qmpphy: phy-wrapper@88e9000 { + <0 0x088e8000 0 0x20>; + reg-names = "reg-base", "dp_com"; + status = "disabled"; +- #clock-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; + ranges; +@@ -1287,7 +1285,6 @@ usb_2_qmpphy: phy-wrapper@88eb000 { + compatible = "qcom,sm8350-qmp-usb3-uni-phy"; + reg = <0 0x088eb000 0 0x200>; + status = "disabled"; +- #clock-cells = <1>; + #address-cells = <2>; + #size-cells = <2>; + ranges; +-- +2.43.0 + diff --git a/queue-5.15/arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch b/queue-5.15/arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch new file mode 100644 index 00000000000..f8ea30345f4 --- /dev/null +++ b/queue-5.15/arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch @@ -0,0 +1,45 @@ +From 98e3969ecfb999716c68e426fbe207dae275370d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 4 Jul 2024 20:58:43 +0530 +Subject: arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB + +From: Krishna Kurapati + +[ Upstream commit 0046325ae52079b46da13a7f84dd7b2a6f7c38f8 ] + +For Gen-1 targets like MSM8998, it is seen that stressing out the +controller in host mode results in HC died error: + + xhci-hcd.12.auto: xHCI host not responding to stop endpoint command + xhci-hcd.12.auto: xHCI host controller not responding, assume dead + xhci-hcd.12.auto: HC died; cleaning up + +And at this instant only restarting the host mode fixes it. Disable +SuperSpeed instance in park mode for MSM8998 to mitigate this issue. + +Cc: stable@vger.kernel.org +Fixes: 026dad8f5873 ("arm64: dts: qcom: msm8998: Add USB-related nodes") +Signed-off-by: Krishna Kurapati +Reviewed-by: Konrad Dybcio +Link: https://lore.kernel.org/r/20240704152848.3380602-4-quic_kriskura@quicinc.com +Signed-off-by: Bjorn Andersson +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/msm8998.dtsi | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi +index 8037288359482..8ca6a6f1a541d 100644 +--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi +@@ -1982,6 +1982,7 @@ usb3_dwc3: dwc3@a800000 { + interrupts = ; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; ++ snps,parkmode-disable-ss-quirk; + phys = <&qusb2phy>, <&usb3phy>; + phy-names = "usb2-phy", "usb3-phy"; + snps,has-lpm-erratum; +-- +2.43.0 + diff --git a/queue-5.15/arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch b/queue-5.15/arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch new file mode 100644 index 00000000000..0f0ebfd2d84 --- /dev/null +++ b/queue-5.15/arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch @@ -0,0 +1,38 @@ +From 3a21a9b1d6da6b146cab3ac25189fac17dd2bcb3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 5 Jul 2022 13:40:24 +0200 +Subject: arm64: dts: qcom: msm8998: drop USB PHY clock index + +From: Johan Hovold + +[ Upstream commit ed9cbbcb8c6a1925db7995214602c6a8983ff870 ] + +The QMP USB PHY provides a single clock so drop the redundant clock +index. + +Signed-off-by: Johan Hovold +Reviewed-by: Dmitry Baryshkov +Signed-off-by: Bjorn Andersson +Link: https://lore.kernel.org/r/20220705114032.22787-7-johan+linaro@kernel.org +Stable-dep-of: 0046325ae520 ("arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/msm8998.dtsi | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi +index 42329e78437e9..f5772d6efaa8b 100644 +--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi +@@ -2013,7 +2013,7 @@ usb1_ssphy: phy@c010200 { + <0xc010600 0x128>, + <0xc010800 0x200>; + #phy-cells = <0>; +- #clock-cells = <1>; ++ #clock-cells = <0>; + clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; + clock-names = "pipe0"; + clock-output-names = "usb3_phy_pipe_clk_src"; +-- +2.43.0 + diff --git a/queue-5.15/arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch b/queue-5.15/arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch new file mode 100644 index 00000000000..408c7b2d5b6 --- /dev/null +++ b/queue-5.15/arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch @@ -0,0 +1,85 @@ +From c7bf87c6ad5bd0e63710c248760b4c01089bcb88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 25 Aug 2023 00:19:46 +0300 +Subject: arm64: dts: qcom: msm8998: switch USB QMP PHY to new style of + bindings + +From: Dmitry Baryshkov + +[ Upstream commit b7efebfeb2e8ad8187cdabba5f0212ba2e6c1069 ] + +Change the USB QMP PHY to use newer style of QMP PHY bindings (single +resource region, no per-PHY subnodes). + +Signed-off-by: Dmitry Baryshkov +Link: https://lore.kernel.org/r/20230824211952.1397699-11-dmitry.baryshkov@linaro.org +Signed-off-by: Bjorn Andersson +Stable-dep-of: 0046325ae520 ("arm64: dts: qcom: msm8998: Disable SS instance in Parkmode for USB") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/qcom/msm8998.dtsi | 35 +++++++++++---------------- + 1 file changed, 14 insertions(+), 21 deletions(-) + +diff --git a/arch/arm64/boot/dts/qcom/msm8998.dtsi b/arch/arm64/boot/dts/qcom/msm8998.dtsi +index f5772d6efaa8b..8037288359482 100644 +--- a/arch/arm64/boot/dts/qcom/msm8998.dtsi ++++ b/arch/arm64/boot/dts/qcom/msm8998.dtsi +@@ -1982,7 +1982,7 @@ usb3_dwc3: dwc3@a800000 { + interrupts = ; + snps,dis_u2_susphy_quirk; + snps,dis_enblslpm_quirk; +- phys = <&qusb2phy>, <&usb1_ssphy>; ++ phys = <&qusb2phy>, <&usb3phy>; + phy-names = "usb2-phy", "usb3-phy"; + snps,has-lpm-erratum; + snps,hird-threshold = /bits/ 8 <0x10>; +@@ -1991,33 +1991,26 @@ usb3_dwc3: dwc3@a800000 { + + usb3phy: phy@c010000 { + compatible = "qcom,msm8998-qmp-usb3-phy"; +- reg = <0x0c010000 0x18c>; +- status = "disabled"; +- #address-cells = <1>; +- #size-cells = <1>; +- ranges; ++ reg = <0x0c010000 0x1000>; + + clocks = <&gcc GCC_USB3_PHY_AUX_CLK>, ++ <&gcc GCC_USB3_CLKREF_CLK>, + <&gcc GCC_USB_PHY_CFG_AHB2PHY_CLK>, +- <&gcc GCC_USB3_CLKREF_CLK>; +- clock-names = "aux", "cfg_ahb", "ref"; ++ <&gcc GCC_USB3_PHY_PIPE_CLK>; ++ clock-names = "aux", ++ "ref", ++ "cfg_ahb", ++ "pipe"; ++ clock-output-names = "usb3_phy_pipe_clk_src"; ++ #clock-cells = <0>; ++ #phy-cells = <0>; + + resets = <&gcc GCC_USB3_PHY_BCR>, + <&gcc GCC_USB3PHY_PHY_BCR>; +- reset-names = "phy", "common"; ++ reset-names = "phy", ++ "phy_phy"; + +- usb1_ssphy: phy@c010200 { +- reg = <0xc010200 0x128>, +- <0xc010400 0x200>, +- <0xc010c00 0x20c>, +- <0xc010600 0x128>, +- <0xc010800 0x200>; +- #phy-cells = <0>; +- #clock-cells = <0>; +- clocks = <&gcc GCC_USB3_PHY_PIPE_CLK>; +- clock-names = "pipe0"; +- clock-output-names = "usb3_phy_pipe_clk_src"; +- }; ++ status = "disabled"; + }; + + qusb2phy: phy@c012000 { +-- +2.43.0 + diff --git a/queue-5.15/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch b/queue-5.15/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch new file mode 100644 index 00000000000..092c3053470 --- /dev/null +++ b/queue-5.15/drivers-soc-xilinx-check-return-status-of-get_api_ve.patch @@ -0,0 +1,53 @@ +From c9bf9ab5231626c369b5edd70e72d2def6f45ae5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 May 2024 04:23:45 -0700 +Subject: drivers: soc: xilinx: check return status of get_api_version() + +From: Jay Buddhabhatti + +[ Upstream commit 9b003e14801cf85a8cebeddc87bc9fc77100fdce ] + +Currently return status is not getting checked for get_api_version +and because of that for x86 arch we are getting below smatch error. + + CC drivers/soc/xilinx/zynqmp_power.o +drivers/soc/xilinx/zynqmp_power.c: In function 'zynqmp_pm_probe': +drivers/soc/xilinx/zynqmp_power.c:295:12: warning: 'pm_api_version' is +used uninitialized [-Wuninitialized] + 295 | if (pm_api_version < ZYNQMP_PM_VERSION) + | ^ + CHECK drivers/soc/xilinx/zynqmp_power.c +drivers/soc/xilinx/zynqmp_power.c:295 zynqmp_pm_probe() error: +uninitialized symbol 'pm_api_version'. + +So, check return status of pm_get_api_version and return error in case +of failure to avoid checking uninitialized pm_api_version variable. + +Fixes: b9b3a8be28b3 ("firmware: xilinx: Remove eemi ops for get_api_version") +Signed-off-by: Jay Buddhabhatti +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240515112345.24673-1-jay.buddhabhatti@amd.com +Signed-off-by: Michal Simek +Signed-off-by: Sasha Levin +--- + drivers/soc/xilinx/zynqmp_power.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c +index f8c301984d4f9..2653d29ba829b 100644 +--- a/drivers/soc/xilinx/zynqmp_power.c ++++ b/drivers/soc/xilinx/zynqmp_power.c +@@ -178,7 +178,9 @@ static int zynqmp_pm_probe(struct platform_device *pdev) + u32 pm_api_version; + struct mbox_client *client; + +- zynqmp_pm_get_api_version(&pm_api_version); ++ ret = zynqmp_pm_get_api_version(&pm_api_version); ++ if (ret) ++ return ret; + + /* Check PM API version number */ + if (pm_api_version < ZYNQMP_PM_VERSION) +-- +2.43.0 + diff --git a/queue-5.15/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-s.patch b/queue-5.15/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-s.patch new file mode 100644 index 00000000000..6b26d4db61a --- /dev/null +++ b/queue-5.15/drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-s.patch @@ -0,0 +1,61 @@ +From fac1554ae82533dbffbcdefbc84f72031b6c615c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 26 Jun 2024 16:48:23 +0800 +Subject: drm/dp_mst: Fix all mstb marked as not probed after suspend/resume + +From: Wayne Lin + +[ Upstream commit d63d81094d208abb20fc444514b2d9ec2f4b7c4e ] + +[Why] +After supend/resume, with topology unchanged, observe that +link_address_sent of all mstb are marked as false even the topology probing +is done without any error. + +It is caused by wrongly also include "ret == 0" case as a probing failure +case. + +[How] +Remove inappropriate checking conditions. + +Cc: Lyude Paul +Cc: Harry Wentland +Cc: Jani Nikula +Cc: Imre Deak +Cc: Daniel Vetter +Cc: stable@vger.kernel.org +Fixes: 37dfdc55ffeb ("drm/dp_mst: Cleanup drm_dp_send_link_address() a bit") +Signed-off-by: Wayne Lin +Reviewed-by: Lyude Paul +Signed-off-by: Lyude Paul +Link: https://patchwork.freedesktop.org/patch/msgid/20240626084825.878565-2-Wayne.Lin@amd.com +Signed-off-by: Sasha Levin +--- + drivers/gpu/drm/drm_dp_mst_topology.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c +index 865c7f39143ec..f24667a003a2b 100644 +--- a/drivers/gpu/drm/drm_dp_mst_topology.c ++++ b/drivers/gpu/drm/drm_dp_mst_topology.c +@@ -2974,7 +2974,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, + + /* FIXME: Actually do some real error handling here */ + ret = drm_dp_mst_wait_tx_reply(mstb, txmsg); +- if (ret <= 0) { ++ if (ret < 0) { + drm_err(mgr->dev, "Sending link address failed with %d\n", ret); + goto out; + } +@@ -3026,7 +3026,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, + mutex_unlock(&mgr->lock); + + out: +- if (ret <= 0) ++ if (ret < 0) + mstb->link_address_sent = false; + kfree(txmsg); + return ret < 0 ? ret : changed; +-- +2.43.0 + diff --git a/queue-5.15/ext4-check-the-extent-status-again-before-inserting-.patch b/queue-5.15/ext4-check-the-extent-status-again-before-inserting-.patch new file mode 100644 index 00000000000..bf9add416b2 --- /dev/null +++ b/queue-5.15/ext4-check-the-extent-status-again-before-inserting-.patch @@ -0,0 +1,100 @@ +From c611e0aeeb0f2e6d629d1f5ca3b834b106d30b22 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 20:39:57 +0800 +Subject: ext4: check the extent status again before inserting delalloc block + +From: Zhang Yi + +[ Upstream commit 0ea6560abb3bac1ffcfa4bf6b2c4d344fdc27b3c ] + +ext4_da_map_blocks looks up for any extent entry in the extent status +tree (w/o i_data_sem) and then the looks up for any ondisk extent +mapping (with i_data_sem in read mode). + +If it finds a hole in the extent status tree or if it couldn't find any +entry at all, it then takes the i_data_sem in write mode to add a da +entry into the extent status tree. This can actually race with page +mkwrite & fallocate path. + +Note that this is ok between +1. ext4 buffered-write path v/s ext4_page_mkwrite(), because of the + folio lock +2. ext4 buffered write path v/s ext4 fallocate because of the inode + lock. + +But this can race between ext4_page_mkwrite() & ext4 fallocate path + +ext4_page_mkwrite() ext4_fallocate() + block_page_mkwrite() + ext4_da_map_blocks() + //find hole in extent status tree + ext4_alloc_file_blocks() + ext4_map_blocks() + //allocate block and unwritten extent + ext4_insert_delayed_block() + ext4_da_reserve_space() + //reserve one more block + ext4_es_insert_delayed_block() + //drop unwritten extent and add delayed extent by mistake + +Then, the delalloc extent is wrong until writeback and the extra +reserved block can't be released any more and it triggers below warning: + + EXT4-fs (pmem2): Inode 13 (00000000bbbd4d23): i_reserved_data_blocks(1) not cleared! + +Fix the problem by looking up extent status tree again while the +i_data_sem is held in write mode. If it still can't find any entry, then +we insert a new da entry into the extent status tree. + +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://patch.msgid.link/20240517124005.347221-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 21 +++++++++++++++++++++ + 1 file changed, 21 insertions(+) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 403f88662bc30..e765c0d05fea2 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1744,6 +1744,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + if (ext4_es_is_hole(&es)) + goto add_delayed; + ++found: + /* + * Delayed extent could be allocated by fallocate. + * So we need to check it. +@@ -1788,6 +1789,26 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); ++ /* ++ * Page fault path (ext4_page_mkwrite does not take i_rwsem) ++ * and fallocate path (no folio lock) can race. Make sure we ++ * lookup the extent status tree here again while i_data_sem ++ * is held in write mode, before inserting a new da entry in ++ * the extent status tree. ++ */ ++ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { ++ if (!ext4_es_is_hole(&es)) { ++ up_write(&EXT4_I(inode)->i_data_sem); ++ goto found; ++ } ++ } else if (!ext4_has_inline_data(inode)) { ++ retval = ext4_map_query_blocks(NULL, inode, map); ++ if (retval) { ++ up_write(&EXT4_I(inode)->i_data_sem); ++ return retval; ++ } ++ } ++ + retval = ext4_insert_delayed_block(inode, map->m_lblk); + up_write(&EXT4_I(inode)->i_data_sem); + if (retval) +-- +2.43.0 + diff --git a/queue-5.15/ext4-convert-to-exclusive-lock-while-inserting-delal.patch b/queue-5.15/ext4-convert-to-exclusive-lock-while-inserting-delal.patch new file mode 100644 index 00000000000..7657843666f --- /dev/null +++ b/queue-5.15/ext4-convert-to-exclusive-lock-while-inserting-delal.patch @@ -0,0 +1,114 @@ +From df4a1b22741f796444d33b35cb894453e8ecdccb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Jan 2024 09:58:01 +0800 +Subject: ext4: convert to exclusive lock while inserting delalloc extents + +From: Zhang Yi + +[ Upstream commit acf795dc161f3cf481db20f05db4250714e375e5 ] + +ext4_da_map_blocks() only hold i_data_sem in shared mode and i_rwsem +when inserting delalloc extents, it could be raced by another querying +path of ext4_map_blocks() without i_rwsem, .e.g buffered read path. +Suppose we buffered read a file containing just a hole, and without any +cached extents tree, then it is raced by another delayed buffered write +to the same area or the near area belongs to the same hole, and the new +delalloc extent could be overwritten to a hole extent. + + pread() pwrite() + filemap_read_folio() + ext4_mpage_readpages() + ext4_map_blocks() + down_read(i_data_sem) + ext4_ext_determine_hole() + //find hole + ext4_ext_put_gap_in_cache() + ext4_es_find_extent_range() + //no delalloc extent + ext4_da_map_blocks() + down_read(i_data_sem) + ext4_insert_delayed_block() + //insert delalloc extent + ext4_es_insert_extent() + //overwrite delalloc extent to hole + +This race could lead to inconsistent delalloc extents tree and +incorrect reserved space counter. Fix this by converting to hold +i_data_sem in exclusive mode when adding a new delalloc extent in +ext4_da_map_blocks(). + +Cc: stable@vger.kernel.org +Signed-off-by: Zhang Yi +Suggested-by: Jan Kara +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20240127015825.1608160-3-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 25 +++++++++++-------------- + 1 file changed, 11 insertions(+), 14 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index bfd81ff29afcf..329e3dc9cb32c 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1712,10 +1712,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + /* Lookup extent status tree firstly */ + if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { +- if (ext4_es_is_hole(&es)) { +- down_read(&EXT4_I(inode)->i_data_sem); ++ if (ext4_es_is_hole(&es)) + goto add_delayed; +- } + + /* + * Delayed extent could be allocated by fallocate. +@@ -1757,8 +1755,10 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else + retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- if (retval < 0) +- goto out_unlock; ++ if (retval < 0) { ++ up_read(&EXT4_I(inode)->i_data_sem); ++ return retval; ++ } + if (retval > 0) { + unsigned int status; + +@@ -1774,24 +1774,21 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); +- goto out_unlock; ++ up_read(&EXT4_I(inode)->i_data_sem); ++ return retval; + } ++ up_read(&EXT4_I(inode)->i_data_sem); + + add_delayed: +- /* +- * XXX: __block_prepare_write() unmaps passed block, +- * is it OK? +- */ ++ down_write(&EXT4_I(inode)->i_data_sem); + retval = ext4_insert_delayed_block(inode, map->m_lblk); ++ up_write(&EXT4_I(inode)->i_data_sem); + if (retval) +- goto out_unlock; ++ return retval; + + map_bh(bh, inode->i_sb, invalid_block); + set_buffer_new(bh); + set_buffer_delay(bh); +- +-out_unlock: +- up_read((&EXT4_I(inode)->i_data_sem)); + return retval; + } + +-- +2.43.0 + diff --git a/queue-5.15/ext4-factor-out-a-common-helper-to-query-extent-map.patch b/queue-5.15/ext4-factor-out-a-common-helper-to-query-extent-map.patch new file mode 100644 index 00000000000..2ae9bd4932a --- /dev/null +++ b/queue-5.15/ext4-factor-out-a-common-helper-to-query-extent-map.patch @@ -0,0 +1,104 @@ +From c6981cfa5ecb077f93636278a24c3b9a11f50384 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 17 May 2024 20:39:56 +0800 +Subject: ext4: factor out a common helper to query extent map + +From: Zhang Yi + +[ Upstream commit 8e4e5cdf2fdeb99445a468b6b6436ad79b9ecb30 ] + +Factor out a new common helper ext4_map_query_blocks() from the +ext4_da_map_blocks(), it query and return the extent map status on the +inode's extent path, no logic changes. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Reviewed-by: Ritesh Harjani (IBM) +Link: https://patch.msgid.link/20240517124005.347221-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 57 +++++++++++++++++++++++++++---------------------- + 1 file changed, 32 insertions(+), 25 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 329e3dc9cb32c..403f88662bc30 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -484,6 +484,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle, + } + #endif /* ES_AGGRESSIVE_TEST */ + ++static int ext4_map_query_blocks(handle_t *handle, struct inode *inode, ++ struct ext4_map_blocks *map) ++{ ++ unsigned int status; ++ int retval; ++ ++ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) ++ retval = ext4_ext_map_blocks(handle, inode, map, 0); ++ else ++ retval = ext4_ind_map_blocks(handle, inode, map, 0); ++ ++ if (retval <= 0) ++ return retval; ++ ++ if (unlikely(retval != map->m_len)) { ++ ext4_warning(inode->i_sb, ++ "ES len assertion failed for inode " ++ "%lu: retval %d != map->m_len %d", ++ inode->i_ino, retval, map->m_len); ++ WARN_ON(1); ++ } ++ ++ status = map->m_flags & EXT4_MAP_UNWRITTEN ? ++ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); ++ return retval; ++} ++ + /* + * The ext4_map_blocks() function tries to look up the requested blocks, + * and returns if the blocks are already mapped. +@@ -1751,33 +1780,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + down_read(&EXT4_I(inode)->i_data_sem); + if (ext4_has_inline_data(inode)) + retval = 0; +- else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) +- retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else +- retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- if (retval < 0) { +- up_read(&EXT4_I(inode)->i_data_sem); +- return retval; +- } +- if (retval > 0) { +- unsigned int status; +- +- if (unlikely(retval != map->m_len)) { +- ext4_warning(inode->i_sb, +- "ES len assertion failed for inode " +- "%lu: retval %d != map->m_len %d", +- inode->i_ino, retval, map->m_len); +- WARN_ON(1); +- } +- +- status = map->m_flags & EXT4_MAP_UNWRITTEN ? +- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; +- ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- up_read(&EXT4_I(inode)->i_data_sem); +- return retval; +- } ++ retval = ext4_map_query_blocks(NULL, inode, map); + up_read(&EXT4_I(inode)->i_data_sem); ++ if (retval) ++ return retval; + + add_delayed: + down_write(&EXT4_I(inode)->i_data_sem); +-- +2.43.0 + diff --git a/queue-5.15/ext4-make-ext4_es_insert_extent-return-void.patch b/queue-5.15/ext4-make-ext4_es_insert_extent-return-void.patch new file mode 100644 index 00000000000..60d5105efc2 --- /dev/null +++ b/queue-5.15/ext4-make-ext4_es_insert_extent-return-void.patch @@ -0,0 +1,158 @@ +From 345939d8af4ebe300ab0c57e48e8e68d6751247f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 24 Apr 2023 11:38:45 +0800 +Subject: ext4: make ext4_es_insert_extent() return void + +From: Baokun Li + +[ Upstream commit 6c120399cde6b1b5cf65ce403765c579fb3d3e50 ] + +Now ext4_es_insert_extent() never return error, so make it return void. + +Signed-off-by: Baokun Li +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20230424033846.4732-12-libaokun1@huawei.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") +Signed-off-by: Sasha Levin +--- + fs/ext4/extents.c | 5 +++-- + fs/ext4/extents_status.c | 14 ++++++-------- + fs/ext4/extents_status.h | 6 +++--- + fs/ext4/inode.c | 21 ++++++--------------- + 4 files changed, 18 insertions(+), 28 deletions(-) + +diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c +index cece004b32d5c..6c41bf322315c 100644 +--- a/fs/ext4/extents.c ++++ b/fs/ext4/extents.c +@@ -3112,8 +3112,9 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex) + if (ee_len == 0) + return 0; + +- return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, +- EXTENT_STATUS_WRITTEN); ++ ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock, ++ EXTENT_STATUS_WRITTEN); ++ return 0; + } + + /* FIXME!! we need to try to merge to left or right after zero-out */ +diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c +index ee52dd6afe543..be3b3ccbf70b6 100644 +--- a/fs/ext4/extents_status.c ++++ b/fs/ext4/extents_status.c +@@ -848,12 +848,10 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes, + /* + * ext4_es_insert_extent() adds information to an inode's extent + * status tree. +- * +- * Return 0 on success, error code on failure. + */ +-int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len, ext4_fsblk_t pblk, +- unsigned int status) ++void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ext4_fsblk_t pblk, ++ unsigned int status) + { + struct extent_status newes; + ext4_lblk_t end = lblk + len - 1; +@@ -865,13 +863,13 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + bool revise_pending = false; + + if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) +- return 0; ++ return; + + es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n", + lblk, len, pblk, status, inode->i_ino); + + if (!len) +- return 0; ++ return; + + BUG_ON(end < lblk); + +@@ -940,7 +938,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, + goto retry; + + ext4_es_print_tree(inode); +- return 0; ++ return; + } + + /* +diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h +index 4ec30a7982605..481ec4381bee6 100644 +--- a/fs/ext4/extents_status.h ++++ b/fs/ext4/extents_status.h +@@ -127,9 +127,9 @@ extern int __init ext4_init_es(void); + extern void ext4_exit_es(void); + extern void ext4_es_init_tree(struct ext4_es_tree *tree); + +-extern int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, +- ext4_lblk_t len, ext4_fsblk_t pblk, +- unsigned int status); ++extern void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, ++ ext4_lblk_t len, ext4_fsblk_t pblk, ++ unsigned int status); + extern void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, + ext4_lblk_t len, ext4_fsblk_t pblk, + unsigned int status); +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 64a783f221052..7ad37c807147b 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -589,10 +589,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, + ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, + map->m_lblk + map->m_len - 1)) + status |= EXTENT_STATUS_DELAYED; +- ret = ext4_es_insert_extent(inode, map->m_lblk, +- map->m_len, map->m_pblk, status); +- if (ret < 0) +- retval = ret; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + up_read((&EXT4_I(inode)->i_data_sem)); + +@@ -701,12 +699,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, + ext4_es_scan_range(inode, &ext4_es_is_delayed, map->m_lblk, + map->m_lblk + map->m_len - 1)) + status |= EXTENT_STATUS_DELAYED; +- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- if (ret < 0) { +- retval = ret; +- goto out_sem; +- } ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + + out_sem: +@@ -1784,7 +1778,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + set_buffer_new(bh); + set_buffer_delay(bh); + } else if (retval > 0) { +- int ret; + unsigned int status; + + if (unlikely(retval != map->m_len)) { +@@ -1797,10 +1790,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + + status = map->m_flags & EXT4_MAP_UNWRITTEN ? + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; +- ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, +- map->m_pblk, status); +- if (ret != 0) +- retval = ret; ++ ext4_es_insert_extent(inode, map->m_lblk, map->m_len, ++ map->m_pblk, status); + } + + out_unlock: +-- +2.43.0 + diff --git a/queue-5.15/ext4-refactor-ext4_da_map_blocks.patch b/queue-5.15/ext4-refactor-ext4_da_map_blocks.patch new file mode 100644 index 00000000000..b07d23e04c0 --- /dev/null +++ b/queue-5.15/ext4-refactor-ext4_da_map_blocks.patch @@ -0,0 +1,93 @@ +From 0dce336ac9e602de16f4a115eec6b741c2df764e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sat, 27 Jan 2024 09:58:00 +0800 +Subject: ext4: refactor ext4_da_map_blocks() + +From: Zhang Yi + +[ Upstream commit 3fcc2b887a1ba4c1f45319cd8c54daa263ecbc36 ] + +Refactor and cleanup ext4_da_map_blocks(), reduce some unnecessary +parameters and branches, no logic changes. + +Signed-off-by: Zhang Yi +Reviewed-by: Jan Kara +Link: https://lore.kernel.org/r/20240127015825.1608160-2-yi.zhang@huaweicloud.com +Signed-off-by: Theodore Ts'o +Stable-dep-of: 0ea6560abb3b ("ext4: check the extent status again before inserting delalloc block") +Signed-off-by: Sasha Levin +--- + fs/ext4/inode.c | 39 +++++++++++++++++---------------------- + 1 file changed, 17 insertions(+), 22 deletions(-) + +diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c +index 7ad37c807147b..bfd81ff29afcf 100644 +--- a/fs/ext4/inode.c ++++ b/fs/ext4/inode.c +@@ -1713,7 +1713,6 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + /* Lookup extent status tree firstly */ + if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) { + if (ext4_es_is_hole(&es)) { +- retval = 0; + down_read(&EXT4_I(inode)->i_data_sem); + goto add_delayed; + } +@@ -1758,26 +1757,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + retval = ext4_ext_map_blocks(NULL, inode, map, 0); + else + retval = ext4_ind_map_blocks(NULL, inode, map, 0); +- +-add_delayed: +- if (retval == 0) { +- int ret; +- +- /* +- * XXX: __block_prepare_write() unmaps passed block, +- * is it OK? +- */ +- +- ret = ext4_insert_delayed_block(inode, map->m_lblk); +- if (ret != 0) { +- retval = ret; +- goto out_unlock; +- } +- +- map_bh(bh, inode->i_sb, invalid_block); +- set_buffer_new(bh); +- set_buffer_delay(bh); +- } else if (retval > 0) { ++ if (retval < 0) ++ goto out_unlock; ++ if (retval > 0) { + unsigned int status; + + if (unlikely(retval != map->m_len)) { +@@ -1792,11 +1774,24 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, + EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; + ext4_es_insert_extent(inode, map->m_lblk, map->m_len, + map->m_pblk, status); ++ goto out_unlock; + } + ++add_delayed: ++ /* ++ * XXX: __block_prepare_write() unmaps passed block, ++ * is it OK? ++ */ ++ retval = ext4_insert_delayed_block(inode, map->m_lblk); ++ if (retval) ++ goto out_unlock; ++ ++ map_bh(bh, inode->i_sb, invalid_block); ++ set_buffer_new(bh); ++ set_buffer_delay(bh); ++ + out_unlock: + up_read((&EXT4_I(inode)->i_data_sem)); +- + return retval; + } + +-- +2.43.0 + diff --git a/queue-5.15/f2fs-assign-curseg_all_data_atgc-if-blkaddr-is-valid.patch b/queue-5.15/f2fs-assign-curseg_all_data_atgc-if-blkaddr-is-valid.patch new file mode 100644 index 00000000000..d5c02a90869 --- /dev/null +++ b/queue-5.15/f2fs-assign-curseg_all_data_atgc-if-blkaddr-is-valid.patch @@ -0,0 +1,59 @@ +From fce24e07ef388141ce18a9fcc5cc4e5bcc05dc42 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 18 Jun 2024 02:15:38 +0000 +Subject: f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid + +From: Jaegeuk Kim + +[ Upstream commit 8cb1f4080dd91c6e6b01dbea013a3f42341cb6a1 ] + +mkdir /mnt/test/comp +f2fs_io setflags compression /mnt/test/comp +dd if=/dev/zero of=/mnt/test/comp/testfile bs=16k count=1 +truncate --size 13 /mnt/test/comp/testfile + +In the above scenario, we can get a BUG_ON. + kernel BUG at fs/f2fs/segment.c:3589! + Call Trace: + do_write_page+0x78/0x390 [f2fs] + f2fs_outplace_write_data+0x62/0xb0 [f2fs] + f2fs_do_write_data_page+0x275/0x740 [f2fs] + f2fs_write_single_data_page+0x1dc/0x8f0 [f2fs] + f2fs_write_multi_pages+0x1e5/0xae0 [f2fs] + f2fs_write_cache_pages+0xab1/0xc60 [f2fs] + f2fs_write_data_pages+0x2d8/0x330 [f2fs] + do_writepages+0xcf/0x270 + __writeback_single_inode+0x44/0x350 + writeback_sb_inodes+0x242/0x530 + __writeback_inodes_wb+0x54/0xf0 + wb_writeback+0x192/0x310 + wb_workfn+0x30d/0x400 + +The reason is we gave CURSEG_ALL_DATA_ATGC to COMPR_ADDR where the +page was set the gcing flag by set_cluster_dirty(). + +Cc: stable@vger.kernel.org +Fixes: 4961acdd65c9 ("f2fs: fix to tag gcing flag on page during block migration") +Reviewed-by: Chao Yu +Tested-by: Will McVicker +Signed-off-by: Jaegeuk Kim +Signed-off-by: Sasha Levin +--- + fs/f2fs/segment.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c +index d8b1980df52d6..ae70e536bef37 100644 +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -3360,6 +3360,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) + if (fio->sbi->am.atgc_enabled && + (fio->io_type == FS_DATA_IO) && + (fio->sbi->gc_mode != GC_URGENT_HIGH) && ++ __is_valid_data_blkaddr(fio->old_blkaddr) && + !is_inode_flag_set(inode, FI_OPU_WRITE)) + return CURSEG_ALL_DATA_ATGC; + else +-- +2.43.0 + diff --git a/queue-5.15/f2fs-fix-to-avoid-use-ssr-allocate-when-do-defragmen.patch b/queue-5.15/f2fs-fix-to-avoid-use-ssr-allocate-when-do-defragmen.patch new file mode 100644 index 00000000000..9088b1844d5 --- /dev/null +++ b/queue-5.15/f2fs-fix-to-avoid-use-ssr-allocate-when-do-defragmen.patch @@ -0,0 +1,75 @@ +From f73481308a0fbe8393969f8daa0d85943a0dd576 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 29 May 2024 17:47:00 +0800 +Subject: f2fs: fix to avoid use SSR allocate when do defragment + +From: Zhiguo Niu + +[ Upstream commit 21327a042dd94bc73181d7300e688699cb1f467e ] + +SSR allocate mode will be used when doing file defragment +if ATGC is working at the same time, that is because +set_page_private_gcing may make CURSEG_ALL_DATA_ATGC segment +type got in f2fs_allocate_data_block when defragment page +is writeback, which may cause file fragmentation is worse. + +A file with 2 fragmentations is changed as following after defragment: + +----------------file info------------------- +sensorsdata : +-------------------------------------------- +dev [254:48] +ino [0x 3029 : 12329] +mode [0x 81b0 : 33200] +nlink [0x 1 : 1] +uid [0x 27e6 : 10214] +gid [0x 27e6 : 10214] +size [0x 242000 : 2367488] +blksize [0x 1000 : 4096] +blocks [0x 1210 : 4624] +-------------------------------------------- + +file_pos start_blk end_blk blks + 0 11361121 11361207 87 + 356352 11361215 11361216 2 + 364544 11361218 11361218 1 + 368640 11361220 11361221 2 + 376832 11361224 11361225 2 + 385024 11361227 11361238 12 + 434176 11361240 11361252 13 + 487424 11361254 11361254 1 + 491520 11361271 11361279 9 + 528384 3681794 3681795 2 + 536576 3681797 3681797 1 + 540672 3681799 3681799 1 + 544768 3681803 3681803 1 + 548864 3681805 3681805 1 + 552960 3681807 3681807 1 + 557056 3681809 3681809 1 + +Signed-off-by: Zhiguo Niu +Reviewed-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 8cb1f4080dd9 ("f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid") +Signed-off-by: Sasha Levin +--- + fs/f2fs/segment.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c +index b059b02fc179d..d8b1980df52d6 100644 +--- a/fs/f2fs/segment.c ++++ b/fs/f2fs/segment.c +@@ -3359,7 +3359,8 @@ static int __get_segment_type_6(struct f2fs_io_info *fio) + if (page_private_gcing(fio->page)) { + if (fio->sbi->am.atgc_enabled && + (fio->io_type == FS_DATA_IO) && +- (fio->sbi->gc_mode != GC_URGENT_HIGH)) ++ (fio->sbi->gc_mode != GC_URGENT_HIGH) && ++ !is_inode_flag_set(inode, FI_OPU_WRITE)) + return CURSEG_ALL_DATA_ATGC; + else + return CURSEG_COLD_DATA; +-- +2.43.0 + diff --git a/queue-5.15/f2fs-introduce-f2fs_ipu_honor_opu_write-ipu-policy.patch b/queue-5.15/f2fs-introduce-f2fs_ipu_honor_opu_write-ipu-policy.patch new file mode 100644 index 00000000000..6f34dce4c94 --- /dev/null +++ b/queue-5.15/f2fs-introduce-f2fs_ipu_honor_opu_write-ipu-policy.patch @@ -0,0 +1,230 @@ +From 73bca3a74464453640ea4a8e3aff8f475fcfdbab Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 4 Feb 2022 15:19:46 +0800 +Subject: f2fs: introduce F2FS_IPU_HONOR_OPU_WRITE ipu policy + +From: Chao Yu + +[ Upstream commit 1018a5463a063715365784704c4e8cdf2eec4b04 ] + +Once F2FS_IPU_FORCE policy is enabled in some cases: +a) f2fs forces to use F2FS_IPU_FORCE in a small-sized volume +b) user sets F2FS_IPU_FORCE policy via sysfs + +Then we may fail to defragment file due to IPU policy check, it doesn't +make sense, let's introduce a new IPU policy to allow OPU during file +defragmentation. + +In small-sized volume, let's enable F2FS_IPU_HONOR_OPU_WRITE policy +by default. + +Signed-off-by: Chao Yu +Signed-off-by: Jaegeuk Kim +Stable-dep-of: 8cb1f4080dd9 ("f2fs: assign CURSEG_ALL_DATA_ATGC if blkaddr is valid") +Signed-off-by: Sasha Levin +--- + Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++- + fs/f2fs/data.c | 18 +++++++++++++----- + fs/f2fs/f2fs.h | 3 ++- + fs/f2fs/file.c | 18 +++++++++++------- + fs/f2fs/segment.h | 5 ++++- + fs/f2fs/super.c | 3 ++- + 6 files changed, 34 insertions(+), 16 deletions(-) + +diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs +index 92bc2bdc8baf1..320fc162bcf15 100644 +--- a/Documentation/ABI/testing/sysfs-fs-f2fs ++++ b/Documentation/ABI/testing/sysfs-fs-f2fs +@@ -55,8 +55,9 @@ Description: Controls the in-place-update policy. + 0x04 F2FS_IPU_UTIL + 0x08 F2FS_IPU_SSR_UTIL + 0x10 F2FS_IPU_FSYNC +- 0x20 F2FS_IPU_ASYNC, ++ 0x20 F2FS_IPU_ASYNC + 0x40 F2FS_IPU_NOCACHE ++ 0x80 F2FS_IPU_HONOR_OPU_WRITE + ==== ================= + + Refer segment.h for details. +diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c +index fa86eaf1d6393..3f8dae229d422 100644 +--- a/fs/f2fs/data.c ++++ b/fs/f2fs/data.c +@@ -2567,6 +2567,9 @@ static inline bool check_inplace_update_policy(struct inode *inode, + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + unsigned int policy = SM_I(sbi)->ipu_policy; + ++ if (policy & (0x1 << F2FS_IPU_HONOR_OPU_WRITE) && ++ is_inode_flag_set(inode, FI_OPU_WRITE)) ++ return false; + if (policy & (0x1 << F2FS_IPU_FORCE)) + return true; + if (policy & (0x1 << F2FS_IPU_SSR) && f2fs_need_SSR(sbi)) +@@ -2637,6 +2640,9 @@ bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio) + if (is_inode_flag_set(inode, FI_ALIGNED_WRITE)) + return true; + ++ if (is_inode_flag_set(inode, FI_OPU_WRITE)) ++ return true; ++ + if (fio) { + if (page_private_gcing(fio->page)) + return true; +@@ -3263,8 +3269,8 @@ static int __f2fs_write_data_pages(struct address_space *mapping, + f2fs_available_free_memory(sbi, DIRTY_DENTS)) + goto skip_write; + +- /* skip writing during file defragment */ +- if (is_inode_flag_set(inode, FI_DO_DEFRAG)) ++ /* skip writing in file defragment preparing stage */ ++ if (is_inode_flag_set(inode, FI_SKIP_WRITES)) + goto skip_write; + + trace_f2fs_writepages(mapping->host, wbc, DATA); +@@ -3998,6 +4004,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, + filemap_invalidate_lock(inode->i_mapping); + + set_inode_flag(inode, FI_ALIGNED_WRITE); ++ set_inode_flag(inode, FI_OPU_WRITE); + + for (; secidx < end_sec; secidx++) { + down_write(&sbi->pin_sem); +@@ -4006,7 +4013,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, + f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false); + f2fs_unlock_op(sbi); + +- set_inode_flag(inode, FI_DO_DEFRAG); ++ set_inode_flag(inode, FI_SKIP_WRITES); + + for (blkofs = 0; blkofs < blk_per_sec; blkofs++) { + struct page *page; +@@ -4023,7 +4030,7 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, + f2fs_put_page(page, 1); + } + +- clear_inode_flag(inode, FI_DO_DEFRAG); ++ clear_inode_flag(inode, FI_SKIP_WRITES); + + ret = filemap_fdatawrite(inode->i_mapping); + +@@ -4034,7 +4041,8 @@ static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk, + } + + done: +- clear_inode_flag(inode, FI_DO_DEFRAG); ++ clear_inode_flag(inode, FI_SKIP_WRITES); ++ clear_inode_flag(inode, FI_OPU_WRITE); + clear_inode_flag(inode, FI_ALIGNED_WRITE); + + filemap_invalidate_unlock(inode->i_mapping); +diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h +index fb1422a81d382..62390632db401 100644 +--- a/fs/f2fs/f2fs.h ++++ b/fs/f2fs/f2fs.h +@@ -714,7 +714,8 @@ enum { + FI_DROP_CACHE, /* drop dirty page cache */ + FI_DATA_EXIST, /* indicate data exists */ + FI_INLINE_DOTS, /* indicate inline dot dentries */ +- FI_DO_DEFRAG, /* indicate defragment is running */ ++ FI_SKIP_WRITES, /* should skip data page writeback */ ++ FI_OPU_WRITE, /* used for opu per file */ + FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ + FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ +diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c +index be9536815e50d..0e6766d4eff57 100644 +--- a/fs/f2fs/file.c ++++ b/fs/f2fs/file.c +@@ -2576,10 +2576,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + bool fragmented = false; + int err; + +- /* if in-place-update policy is enabled, don't waste time here */ +- if (f2fs_should_update_inplace(inode, NULL)) +- return -EINVAL; +- + pg_start = range->start >> PAGE_SHIFT; + pg_end = (range->start + range->len) >> PAGE_SHIFT; + +@@ -2587,6 +2583,13 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + + inode_lock(inode); + ++ /* if in-place-update policy is enabled, don't waste time here */ ++ set_inode_flag(inode, FI_OPU_WRITE); ++ if (f2fs_should_update_inplace(inode, NULL)) { ++ err = -EINVAL; ++ goto out; ++ } ++ + /* writeback all dirty pages in the range */ + err = filemap_write_and_wait_range(inode->i_mapping, range->start, + range->start + range->len - 1); +@@ -2668,7 +2671,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + goto check; + } + +- set_inode_flag(inode, FI_DO_DEFRAG); ++ set_inode_flag(inode, FI_SKIP_WRITES); + + idx = map.m_lblk; + while (idx < map.m_lblk + map.m_len && cnt < blk_per_seg) { +@@ -2694,15 +2697,16 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, + if (map.m_lblk < pg_end && cnt < blk_per_seg) + goto do_map; + +- clear_inode_flag(inode, FI_DO_DEFRAG); ++ clear_inode_flag(inode, FI_SKIP_WRITES); + + err = filemap_fdatawrite(inode->i_mapping); + if (err) + goto out; + } + clear_out: +- clear_inode_flag(inode, FI_DO_DEFRAG); ++ clear_inode_flag(inode, FI_SKIP_WRITES); + out: ++ clear_inode_flag(inode, FI_OPU_WRITE); + inode_unlock(inode); + if (!err) + range->len = (u64)total << PAGE_SHIFT; +diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h +index d1c0c8732c4fd..6d3a48407e1ba 100644 +--- a/fs/f2fs/segment.h ++++ b/fs/f2fs/segment.h +@@ -667,7 +667,9 @@ static inline int utilization(struct f2fs_sb_info *sbi) + * pages over min_fsync_blocks. (=default option) + * F2FS_IPU_ASYNC - do IPU given by asynchronous write requests. + * F2FS_IPU_NOCACHE - disable IPU bio cache. +- * F2FS_IPUT_DISABLE - disable IPU. (=default option in LFS mode) ++ * F2FS_IPU_HONOR_OPU_WRITE - use OPU write prior to IPU write if inode has ++ * FI_OPU_WRITE flag. ++ * F2FS_IPU_DISABLE - disable IPU. (=default option in LFS mode) + */ + #define DEF_MIN_IPU_UTIL 70 + #define DEF_MIN_FSYNC_BLOCKS 8 +@@ -683,6 +685,7 @@ enum { + F2FS_IPU_FSYNC, + F2FS_IPU_ASYNC, + F2FS_IPU_NOCACHE, ++ F2FS_IPU_HONOR_OPU_WRITE, + }; + + static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, +diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c +index 144c35b2760f4..5ba244d01c6ea 100644 +--- a/fs/f2fs/super.c ++++ b/fs/f2fs/super.c +@@ -3992,7 +3992,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi) + F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE; + if (f2fs_block_unit_discard(sbi)) + sm_i->dcc_info->discard_granularity = 1; +- sm_i->ipu_policy = 1 << F2FS_IPU_FORCE; ++ sm_i->ipu_policy = 1 << F2FS_IPU_FORCE | ++ 1 << F2FS_IPU_HONOR_OPU_WRITE; + } + + sbi->readdir_ra = 1; +-- +2.43.0 + diff --git a/queue-5.15/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch b/queue-5.15/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch new file mode 100644 index 00000000000..1c4777d99e5 --- /dev/null +++ b/queue-5.15/genirq-allow-the-pm-device-to-originate-from-irq-dom.patch @@ -0,0 +1,122 @@ +From a7312a0112078620cb81a7e8d2bca62b73624847 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 1 Feb 2022 12:02:59 +0000 +Subject: genirq: Allow the PM device to originate from irq domain + +From: Marc Zyngier + +[ Upstream commit 1f8863bfb5ca500ea1c7669b16b1931ba27fce20 ] + +As a preparation to moving the reference to the device used for +runtime power management, add a new 'dev' field to the irqdomain +structure for that exact purpose. + +The irq_chip_pm_{get,put}() helpers are made aware of the dual +location via a new private helper. + +No functional change intended. + +Signed-off-by: Marc Zyngier +Reviewed-by: Geert Uytterhoeven +Tested-by: Geert Uytterhoeven +Tested-by: Tony Lindgren +Acked-by: Bartosz Golaszewski +Link: https://lore.kernel.org/r/20220201120310.878267-2-maz@kernel.org +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + include/linux/irqdomain.h | 10 ++++++++++ + kernel/irq/chip.c | 23 ++++++++++++++++++----- + 2 files changed, 28 insertions(+), 5 deletions(-) + +diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h +index 9ee238ad29ce9..a7c80bd4b45b6 100644 +--- a/include/linux/irqdomain.h ++++ b/include/linux/irqdomain.h +@@ -147,6 +147,8 @@ struct irq_domain_chip_generic; + * @gc: Pointer to a list of generic chips. There is a helper function for + * setting up one or more generic chips for interrupt controllers + * drivers using the generic chip library which uses this pointer. ++ * @dev: Pointer to a device that the domain represent, and that will be ++ * used for power management purposes. + * @parent: Pointer to parent irq_domain to support hierarchy irq_domains + * + * Revmap data, used internally by irq_domain +@@ -167,6 +169,7 @@ struct irq_domain { + struct fwnode_handle *fwnode; + enum irq_domain_bus_token bus_token; + struct irq_domain_chip_generic *gc; ++ struct device *dev; + #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY + struct irq_domain *parent; + #endif +@@ -222,6 +225,13 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) + return to_of_node(d->fwnode); + } + ++static inline void irq_domain_set_pm_device(struct irq_domain *d, ++ struct device *dev) ++{ ++ if (d) ++ d->dev = dev; ++} ++ + #ifdef CONFIG_IRQ_DOMAIN + struct fwnode_handle *__irq_domain_alloc_fwnode(unsigned int type, int id, + const char *name, phys_addr_t *pa); +diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c +index f3920374f71ce..7ea66e55ef86b 100644 +--- a/kernel/irq/chip.c ++++ b/kernel/irq/chip.c +@@ -1559,6 +1559,17 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + return 0; + } + ++static struct device *irq_get_parent_device(struct irq_data *data) ++{ ++ if (data->chip->parent_device) ++ return data->chip->parent_device; ++ ++ if (data->domain) ++ return data->domain->dev; ++ ++ return NULL; ++} ++ + /** + * irq_chip_pm_get - Enable power for an IRQ chip + * @data: Pointer to interrupt specific data +@@ -1568,12 +1579,13 @@ int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) + */ + int irq_chip_pm_get(struct irq_data *data) + { ++ struct device *dev = irq_get_parent_device(data); + int retval; + +- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) { +- retval = pm_runtime_get_sync(data->chip->parent_device); ++ if (IS_ENABLED(CONFIG_PM) && dev) { ++ retval = pm_runtime_get_sync(dev); + if (retval < 0) { +- pm_runtime_put_noidle(data->chip->parent_device); ++ pm_runtime_put_noidle(dev); + return retval; + } + } +@@ -1591,10 +1603,11 @@ int irq_chip_pm_get(struct irq_data *data) + */ + int irq_chip_pm_put(struct irq_data *data) + { ++ struct device *dev = irq_get_parent_device(data); + int retval = 0; + +- if (IS_ENABLED(CONFIG_PM) && data->chip->parent_device) +- retval = pm_runtime_put(data->chip->parent_device); ++ if (IS_ENABLED(CONFIG_PM) && dev) ++ retval = pm_runtime_put(dev); + + return (retval < 0) ? retval : 0; + } +-- +2.43.0 + diff --git a/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch b/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch new file mode 100644 index 00000000000..f61d4ff709f --- /dev/null +++ b/queue-5.15/ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch @@ -0,0 +1,110 @@ +From efc647f45293868fee5381817dc674cf2d997844 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 18:35:59 -0800 +Subject: ipc: check checkpoint_restore_ns_capable() to modify C/R proc files + +From: Michal Clapinski + +[ Upstream commit 5563cabdde7ee53c34ec7e5e0283bfcc9a1bc893 ] + +This commit removes the requirement to be root to modify sem_next_id, +msg_next_id and shm_next_id and checks checkpoint_restore_ns_capable +instead. + +Since those files are specific to the IPC namespace, there is no reason +they should require root privileges. This is similar to ns_last_pid, +which also only checks checkpoint_restore_ns_capable. + +[akpm@linux-foundation.org: ipc/ipc_sysctl.c needs capability.h for checkpoint_restore_ns_capable()] + +Link: https://lkml.kernel.org/r/20210916163717.3179496-1-mclapinski@google.com +Signed-off-by: Michal Clapinski +Reviewed-by: Davidlohr Bueso +Reviewed-by: Manfred Spraul +Cc: "Eric W. Biederman" +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 29 +++++++++++++++++++++++------ + 1 file changed, 23 insertions(+), 6 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 3f312bf2b1163..345e4d673e61e 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + #include + #include + #include "util.h" +@@ -104,6 +105,19 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + return ret; + } + ++#ifdef CONFIG_CHECKPOINT_RESTORE ++static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, ++ int write, void *buffer, size_t *lenp, loff_t *ppos) ++{ ++ struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; ++ ++ if (write && !checkpoint_restore_ns_capable(user_ns)) ++ return -EPERM; ++ ++ return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++} ++#endif ++ + #else + #define proc_ipc_doulongvec_minmax NULL + #define proc_ipc_dointvec NULL +@@ -111,6 +125,9 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + #define proc_ipc_dointvec_minmax_orphans NULL + #define proc_ipc_auto_msgmni NULL + #define proc_ipc_sem_dointvec NULL ++#ifdef CONFIG_CHECKPOINT_RESTORE ++#define proc_ipc_dointvec_minmax_checkpoint_restore NULL ++#endif /* CONFIG_CHECKPOINT_RESTORE */ + #endif + + int ipc_mni = IPCMNI; +@@ -198,8 +215,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "sem_next_id", + .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -207,8 +224,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "msg_next_id", + .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -216,8 +233,8 @@ static struct ctl_table ipc_kern_table[] = { + .procname = "shm_next_id", + .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), +- .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .mode = 0666, ++ .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +-- +2.43.0 + diff --git a/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch b/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch new file mode 100644 index 00000000000..4a5838425a6 --- /dev/null +++ b/queue-5.15/ipc-check-permissions-for-checkpoint_restart-sysctls.patch @@ -0,0 +1,137 @@ +From 6bf48e86990d981eec1125c59ee6a81a3ff9bf05 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 3 May 2022 15:39:56 +0200 +Subject: ipc: Check permissions for checkpoint_restart sysctls at open time + +From: Alexey Gladkov + +[ Upstream commit 0889f44e281034e180daa6daf3e2d57c012452d4 ] + +As Eric Biederman pointed out, it is possible not to use a custom +proc_handler and check permissions for every write, but to use a +.permission handler. That will allow the checkpoint_restart sysctls to +perform all of their permission checks at open time, and not need any +other special code. + +Link: https://lore.kernel.org/lkml/87czib9g38.fsf@email.froward.int.ebiederm.org/ +Fixes: 1f5c135ee509 ("ipc: Store ipc sysctls in the ipc namespace") +Signed-off-by: Eric W. Biederman +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/65fa8459803830608da4610a39f33c76aa933eb9.1651584847.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 57 ++++++++++++++++++++++++------------------------ + 1 file changed, 29 insertions(+), 28 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 15210ac47e9e1..a2b871d006da7 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -78,25 +78,6 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + return ret; + } + +-#ifdef CONFIG_CHECKPOINT_RESTORE +-static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, +- int write, void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ipc_namespace *ns = table->extra1; +- struct ctl_table ipc_table; +- +- if (write && !checkpoint_restore_ns_capable(ns->user_ns)) +- return -EPERM; +- +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- +- ipc_table.extra1 = SYSCTL_ZERO; +- ipc_table.extra2 = SYSCTL_INT_MAX; +- +- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +-} +-#endif +- + int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; +@@ -180,22 +161,28 @@ static struct ctl_table ipc_sysctls[] = { + .procname = "sem_next_id", + .data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "msg_next_id", + .data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "shm_next_id", + .data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id, + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), +- .mode = 0666, +- .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, ++ .mode = 0444, ++ .proc_handler = proc_dointvec_minmax, ++ .extra1 = SYSCTL_ZERO, ++ .extra2 = SYSCTL_INT_MAX, + }, + #endif + {} +@@ -211,8 +198,25 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->ipc_set == set; + } + ++static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) ++{ ++ int mode = table->mode; ++ ++#ifdef CONFIG_CHECKPOINT_RESTORE ++ struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ ++ if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || ++ (table->data == &ns->ids[IPC_MSG_IDS].next_id) || ++ (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && ++ checkpoint_restore_ns_capable(ns->user_ns)) ++ mode = 0666; ++#endif ++ return mode; ++} ++ + static struct ctl_table_root set_root = { + .lookup = set_lookup, ++ .permissions = ipc_permissions, + }; + + bool setup_ipc_sysctls(struct ipc_namespace *ns) +@@ -254,15 +258,12 @@ bool setup_ipc_sysctls(struct ipc_namespace *ns) + #ifdef CONFIG_CHECKPOINT_RESTORE + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; +- tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; +- tbl[i].extra1 = ns; + + } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { + tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; +- tbl[i].extra1 = ns; + #endif + } else { + tbl[i].data = NULL; +-- +2.43.0 + diff --git a/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch b/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch new file mode 100644 index 00000000000..c1140b64f19 --- /dev/null +++ b/queue-5.15/ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch @@ -0,0 +1,69 @@ +From 25f6606b9e49210b3beb55303a36275e7c923b29 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 8 Nov 2021 18:36:02 -0800 +Subject: ipc/ipc_sysctl.c: remove fallback for !CONFIG_PROC_SYSCTL + +From: Manfred Spraul + +[ Upstream commit 0e9beb8a96f21a6df1579cb3a679e150e3269d80 ] + +Compilation of ipc/ipc_sysctl.c is controlled by +obj-$(CONFIG_SYSVIPC_SYSCTL) +[see ipc/Makefile] + +And CONFIG_SYSVIPC_SYSCTL depends on SYSCTL +[see init/Kconfig] + +An SYSCTL is selected by PROC_SYSCTL. +[see fs/proc/Kconfig] + +Thus: #ifndef CONFIG_PROC_SYSCTL in ipc/ipc_sysctl.c is impossible, the +fallback can be removed. + +Link: https://lkml.kernel.org/r/20210918145337.3369-1-manfred@colorfullife.com +Signed-off-by: Manfred Spraul +Reviewed-by: "Eric W. Biederman" +Acked-by: Davidlohr Bueso +Cc: Manfred Spraul +Signed-off-by: Andrew Morton +Signed-off-by: Linus Torvalds +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 13 ------------- + 1 file changed, 13 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 345e4d673e61e..f101c171753f6 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -23,7 +23,6 @@ static void *get_ipc(struct ctl_table *table) + return which; + } + +-#ifdef CONFIG_PROC_SYSCTL + static int proc_ipc_dointvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { +@@ -118,18 +117,6 @@ static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, + } + #endif + +-#else +-#define proc_ipc_doulongvec_minmax NULL +-#define proc_ipc_dointvec NULL +-#define proc_ipc_dointvec_minmax NULL +-#define proc_ipc_dointvec_minmax_orphans NULL +-#define proc_ipc_auto_msgmni NULL +-#define proc_ipc_sem_dointvec NULL +-#ifdef CONFIG_CHECKPOINT_RESTORE +-#define proc_ipc_dointvec_minmax_checkpoint_restore NULL +-#endif /* CONFIG_CHECKPOINT_RESTORE */ +-#endif +- + int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; +-- +2.43.0 + diff --git a/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch b/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch new file mode 100644 index 00000000000..f94d17b32f5 --- /dev/null +++ b/queue-5.15/ipc-store-ipc-sysctls-in-the-ipc-namespace.patch @@ -0,0 +1,406 @@ +From 9b69bb3c9c4743bd5ae28e9a658635b06408cd0a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 19:18:15 +0100 +Subject: ipc: Store ipc sysctls in the ipc namespace + +From: Alexey Gladkov + +[ Upstream commit 1f5c135ee509e89e0cc274333a65f73c62cb16e5 ] + +The ipc sysctls are not available for modification inside the user +namespace. Following the mqueue sysctls, we changed the implementation +to be more userns friendly. + +So far, the changes do not provide additional access to files. This +will be done in a future patch. + +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/be6f9d014276f4dddd0c3aa05a86052856c1c555.1644862280.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + include/linux/ipc_namespace.h | 21 ++++ + ipc/ipc_sysctl.c | 189 ++++++++++++++++++++++------------ + ipc/namespace.c | 4 + + 3 files changed, 147 insertions(+), 67 deletions(-) + +diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h +index fa787d97d60a9..e3e8c8662b490 100644 +--- a/include/linux/ipc_namespace.h ++++ b/include/linux/ipc_namespace.h +@@ -67,6 +67,9 @@ struct ipc_namespace { + struct ctl_table_set mq_set; + struct ctl_table_header *mq_sysctls; + ++ struct ctl_table_set ipc_set; ++ struct ctl_table_header *ipc_sysctls; ++ + /* user_ns which owns the ipc ns */ + struct user_namespace *user_ns; + struct ucounts *ucounts; +@@ -188,4 +191,22 @@ static inline bool setup_mq_sysctls(struct ipc_namespace *ns) + } + + #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ ++ ++#ifdef CONFIG_SYSVIPC_SYSCTL ++ ++bool setup_ipc_sysctls(struct ipc_namespace *ns); ++void retire_ipc_sysctls(struct ipc_namespace *ns); ++ ++#else /* CONFIG_SYSVIPC_SYSCTL */ ++ ++static inline void retire_ipc_sysctls(struct ipc_namespace *ns) ++{ ++} ++ ++static inline bool setup_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ return true; ++} ++ ++#endif /* CONFIG_SYSVIPC_SYSCTL */ + #endif +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index f101c171753f6..15210ac47e9e1 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -13,43 +13,22 @@ + #include + #include + #include ++#include + #include "util.h" + +-static void *get_ipc(struct ctl_table *table) +-{ +- char *which = table->data; +- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; +- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; +- return which; +-} +- +-static int proc_ipc_dointvec(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table ipc_table; +- +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); +- +- return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); +-} +- +-static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, ++static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { ++ struct ipc_namespace *ns = table->extra1; + struct ctl_table ipc_table; ++ int err; + + memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); + +- return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +-} ++ ipc_table.extra1 = SYSCTL_ZERO; ++ ipc_table.extra2 = SYSCTL_ONE; + +-static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; +- int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ err = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); + + if (err < 0) + return err; +@@ -58,17 +37,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, + return err; + } + +-static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table ipc_table; +- memcpy(&ipc_table, table, sizeof(ipc_table)); +- ipc_table.data = get_ipc(table); +- +- return proc_doulongvec_minmax(&ipc_table, write, buffer, +- lenp, ppos); +-} +- + static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { +@@ -87,11 +55,17 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, + static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) + { ++ struct ipc_namespace *ns = table->extra1; ++ struct ctl_table ipc_table; + int ret, semmni; +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ ++ memcpy(&ipc_table, table, sizeof(ipc_table)); ++ ++ ipc_table.extra1 = NULL; ++ ipc_table.extra2 = NULL; + + semmni = ns->sem_ctls[3]; +- ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos); ++ ret = proc_dointvec(table, write, buffer, lenp, ppos); + + if (!ret) + ret = sem_check_semmni(current->nsproxy->ipc_ns); +@@ -108,12 +82,18 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write, + static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table, + int write, void *buffer, size_t *lenp, loff_t *ppos) + { +- struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns; ++ struct ipc_namespace *ns = table->extra1; ++ struct ctl_table ipc_table; + +- if (write && !checkpoint_restore_ns_capable(user_ns)) ++ if (write && !checkpoint_restore_ns_capable(ns->user_ns)) + return -EPERM; + +- return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); ++ memcpy(&ipc_table, table, sizeof(ipc_table)); ++ ++ ipc_table.extra1 = SYSCTL_ZERO; ++ ipc_table.extra2 = SYSCTL_INT_MAX; ++ ++ return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); + } + #endif + +@@ -121,27 +101,27 @@ int ipc_mni = IPCMNI; + int ipc_mni_shift = IPCMNI_SHIFT; + int ipc_min_cycle = RADIX_TREE_MAP_SIZE; + +-static struct ctl_table ipc_kern_table[] = { ++static struct ctl_table ipc_sysctls[] = { + { + .procname = "shmmax", + .data = &init_ipc_ns.shm_ctlmax, + .maxlen = sizeof(init_ipc_ns.shm_ctlmax), + .mode = 0644, +- .proc_handler = proc_ipc_doulongvec_minmax, ++ .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "shmall", + .data = &init_ipc_ns.shm_ctlall, + .maxlen = sizeof(init_ipc_ns.shm_ctlall), + .mode = 0644, +- .proc_handler = proc_ipc_doulongvec_minmax, ++ .proc_handler = proc_doulongvec_minmax, + }, + { + .procname = "shmmni", + .data = &init_ipc_ns.shm_ctlmni, + .maxlen = sizeof(init_ipc_ns.shm_ctlmni), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, + }, +@@ -151,15 +131,13 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_ONE, + }, + { + .procname = "msgmax", + .data = &init_ipc_ns.msg_ctlmax, + .maxlen = sizeof(init_ipc_ns.msg_ctlmax), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -168,7 +146,7 @@ static struct ctl_table ipc_kern_table[] = { + .data = &init_ipc_ns.msg_ctlmni, + .maxlen = sizeof(init_ipc_ns.msg_ctlmni), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &ipc_mni, + }, +@@ -186,7 +164,7 @@ static struct ctl_table ipc_kern_table[] = { + .data = &init_ipc_ns.msg_ctlmnb, + .maxlen = sizeof(init_ipc_ns.msg_ctlmnb), + .mode = 0644, +- .proc_handler = proc_ipc_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_INT_MAX, + }, +@@ -204,8 +182,6 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "msg_next_id", +@@ -213,8 +189,6 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + { + .procname = "shm_next_id", +@@ -222,25 +196,106 @@ static struct ctl_table ipc_kern_table[] = { + .maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id), + .mode = 0666, + .proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore, +- .extra1 = SYSCTL_ZERO, +- .extra2 = SYSCTL_INT_MAX, + }, + #endif + {} + }; + +-static struct ctl_table ipc_root_table[] = { +- { +- .procname = "kernel", +- .mode = 0555, +- .child = ipc_kern_table, +- }, +- {} ++static struct ctl_table_set *set_lookup(struct ctl_table_root *root) ++{ ++ return ¤t->nsproxy->ipc_ns->ipc_set; ++} ++ ++static int set_is_seen(struct ctl_table_set *set) ++{ ++ return ¤t->nsproxy->ipc_ns->ipc_set == set; ++} ++ ++static struct ctl_table_root set_root = { ++ .lookup = set_lookup, + }; + ++bool setup_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen); ++ ++ tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL); ++ if (tbl) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) { ++ if (tbl[i].data == &init_ipc_ns.shm_ctlmax) { ++ tbl[i].data = &ns->shm_ctlmax; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_ctlall) { ++ tbl[i].data = &ns->shm_ctlall; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_ctlmni) { ++ tbl[i].data = &ns->shm_ctlmni; ++ ++ } else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced) { ++ tbl[i].data = &ns->shm_rmid_forced; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmax) { ++ tbl[i].data = &ns->msg_ctlmax; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmni) { ++ tbl[i].data = &ns->msg_ctlmni; ++ ++ } else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb) { ++ tbl[i].data = &ns->msg_ctlmnb; ++ ++ } else if (tbl[i].data == &init_ipc_ns.sem_ctls) { ++ tbl[i].data = &ns->sem_ctls; ++ tbl[i].extra1 = ns; ++#ifdef CONFIG_CHECKPOINT_RESTORE ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id; ++ tbl[i].extra1 = ns; ++ ++ } else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id) { ++ tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id; ++ tbl[i].extra1 = ns; ++#endif ++ } else { ++ tbl[i].data = NULL; ++ } ++ } ++ ++ ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl); ++ } ++ if (!ns->ipc_sysctls) { ++ kfree(tbl); ++ retire_sysctl_set(&ns->ipc_set); ++ return false; ++ } ++ ++ return true; ++} ++ ++void retire_ipc_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ tbl = ns->ipc_sysctls->ctl_table_arg; ++ unregister_sysctl_table(ns->ipc_sysctls); ++ retire_sysctl_set(&ns->ipc_set); ++ kfree(tbl); ++} ++ + static int __init ipc_sysctl_init(void) + { +- register_sysctl_table(ipc_root_table); ++ if (!setup_ipc_sysctls(&init_ipc_ns)) { ++ pr_warn("ipc sysctl registration failed\n"); ++ return -ENOMEM; ++ } + return 0; + } + +diff --git a/ipc/namespace.c b/ipc/namespace.c +index f760243ca685c..754f3237194aa 100644 +--- a/ipc/namespace.c ++++ b/ipc/namespace.c +@@ -63,6 +63,9 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, + if (!setup_mq_sysctls(ns)) + goto fail_put; + ++ if (!setup_ipc_sysctls(ns)) ++ goto fail_put; ++ + sem_init_ns(ns); + msg_init_ns(ns); + shm_init_ns(ns); +@@ -130,6 +133,7 @@ static void free_ipc_ns(struct ipc_namespace *ns) + shm_exit_ns(ns); + + retire_mq_sysctls(ns); ++ retire_ipc_sysctls(ns); + + dec_ipc_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); +-- +2.43.0 + diff --git a/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch b/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch new file mode 100644 index 00000000000..58a55cbcf92 --- /dev/null +++ b/queue-5.15/ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch @@ -0,0 +1,323 @@ +From 7ec01f48cee5af0c38b69108ec374e095a80b742 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Feb 2022 19:18:14 +0100 +Subject: ipc: Store mqueue sysctls in the ipc namespace + +From: Alexey Gladkov + +[ Upstream commit dc55e35f9e810f23dd69cfdc91a3d636023f57a2 ] + +Right now, the mqueue sysctls take ipc namespaces into account in a +rather hacky way. This works in most cases, but does not respect the +user namespace. + +Within the user namespace, the user cannot change the /proc/sys/fs/mqueue/* +parametres. This poses a problem in the rootless containers. + +To solve this I changed the implementation of the mqueue sysctls just +like some other sysctls. + +So far, the changes do not provide additional access to files. This will +be done in a future patch. + +v3: +* Don't implemenet set_permissions to keep the current behavior. + +v2: +* Fixed compilation problem if CONFIG_POSIX_MQUEUE_SYSCTL is not + specified. + +Reported-by: kernel test robot +Signed-off-by: Alexey Gladkov +Link: https://lkml.kernel.org/r/b0ccbb2489119f1f20c737cf1930c3a9c4e4243a.1644862280.git.legion@kernel.org +Signed-off-by: Eric W. Biederman +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + include/linux/ipc_namespace.h | 16 +++-- + ipc/mq_sysctl.c | 121 ++++++++++++++++++---------------- + ipc/mqueue.c | 10 ++- + ipc/namespace.c | 6 ++ + 4 files changed, 88 insertions(+), 65 deletions(-) + +diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h +index b75395ec8d521..fa787d97d60a9 100644 +--- a/include/linux/ipc_namespace.h ++++ b/include/linux/ipc_namespace.h +@@ -10,6 +10,7 @@ + #include + #include + #include ++#include + + struct user_namespace; + +@@ -63,6 +64,9 @@ struct ipc_namespace { + unsigned int mq_msg_default; + unsigned int mq_msgsize_default; + ++ struct ctl_table_set mq_set; ++ struct ctl_table_header *mq_sysctls; ++ + /* user_ns which owns the ipc ns */ + struct user_namespace *user_ns; + struct ucounts *ucounts; +@@ -169,14 +173,18 @@ static inline void put_ipc_ns(struct ipc_namespace *ns) + + #ifdef CONFIG_POSIX_MQUEUE_SYSCTL + +-struct ctl_table_header; +-extern struct ctl_table_header *mq_register_sysctl_table(void); ++void retire_mq_sysctls(struct ipc_namespace *ns); ++bool setup_mq_sysctls(struct ipc_namespace *ns); + + #else /* CONFIG_POSIX_MQUEUE_SYSCTL */ + +-static inline struct ctl_table_header *mq_register_sysctl_table(void) ++static inline void retire_mq_sysctls(struct ipc_namespace *ns) + { +- return NULL; ++} ++ ++static inline bool setup_mq_sysctls(struct ipc_namespace *ns) ++{ ++ return true; + } + + #endif /* CONFIG_POSIX_MQUEUE_SYSCTL */ +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index 72a92a08c848e..fbf6a8b93a265 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -9,39 +9,9 @@ + #include + #include + +-#ifdef CONFIG_PROC_SYSCTL +-static void *get_mq(struct ctl_table *table) +-{ +- char *which = table->data; +- struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; +- which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns; +- return which; +-} +- +-static int proc_mq_dointvec(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table mq_table; +- memcpy(&mq_table, table, sizeof(mq_table)); +- mq_table.data = get_mq(table); +- +- return proc_dointvec(&mq_table, write, buffer, lenp, ppos); +-} +- +-static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, +- void *buffer, size_t *lenp, loff_t *ppos) +-{ +- struct ctl_table mq_table; +- memcpy(&mq_table, table, sizeof(mq_table)); +- mq_table.data = get_mq(table); +- +- return proc_dointvec_minmax(&mq_table, write, buffer, +- lenp, ppos); +-} +-#else +-#define proc_mq_dointvec NULL +-#define proc_mq_dointvec_minmax NULL +-#endif ++#include ++#include ++#include + + static int msg_max_limit_min = MIN_MSGMAX; + static int msg_max_limit_max = HARD_MSGMAX; +@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_queues_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec, ++ .proc_handler = proc_dointvec, + }, + { + .procname = "msg_max", + .data = &init_ipc_ns.mq_msg_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, +@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msgsize_max, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, +@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msg_default, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_max_limit_min, + .extra2 = &msg_max_limit_max, + }, +@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = { + .data = &init_ipc_ns.mq_msgsize_default, + .maxlen = sizeof(int), + .mode = 0644, +- .proc_handler = proc_mq_dointvec_minmax, ++ .proc_handler = proc_dointvec_minmax, + .extra1 = &msg_maxsize_limit_min, + .extra2 = &msg_maxsize_limit_max, + }, + {} + }; + +-static struct ctl_table mq_sysctl_dir[] = { +- { +- .procname = "mqueue", +- .mode = 0555, +- .child = mq_sysctls, +- }, +- {} +-}; ++static struct ctl_table_set *set_lookup(struct ctl_table_root *root) ++{ ++ return ¤t->nsproxy->ipc_ns->mq_set; ++} + +-static struct ctl_table mq_sysctl_root[] = { +- { +- .procname = "fs", +- .mode = 0555, +- .child = mq_sysctl_dir, +- }, +- {} ++static int set_is_seen(struct ctl_table_set *set) ++{ ++ return ¤t->nsproxy->ipc_ns->mq_set == set; ++} ++ ++static struct ctl_table_root set_root = { ++ .lookup = set_lookup, + }; + +-struct ctl_table_header *mq_register_sysctl_table(void) ++bool setup_mq_sysctls(struct ipc_namespace *ns) + { +- return register_sysctl_table(mq_sysctl_root); ++ struct ctl_table *tbl; ++ ++ setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen); ++ ++ tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL); ++ if (tbl) { ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) { ++ if (tbl[i].data == &init_ipc_ns.mq_queues_max) ++ tbl[i].data = &ns->mq_queues_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msg_max) ++ tbl[i].data = &ns->mq_msg_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max) ++ tbl[i].data = &ns->mq_msgsize_max; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msg_default) ++ tbl[i].data = &ns->mq_msg_default; ++ ++ else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default) ++ tbl[i].data = &ns->mq_msgsize_default; ++ else ++ tbl[i].data = NULL; ++ } ++ ++ ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl); ++ } ++ if (!ns->mq_sysctls) { ++ kfree(tbl); ++ retire_sysctl_set(&ns->mq_set); ++ return false; ++ } ++ ++ return true; ++} ++ ++void retire_mq_sysctls(struct ipc_namespace *ns) ++{ ++ struct ctl_table *tbl; ++ ++ tbl = ns->mq_sysctls->ctl_table_arg; ++ unregister_sysctl_table(ns->mq_sysctls); ++ retire_sysctl_set(&ns->mq_set); ++ kfree(tbl); + } +diff --git a/ipc/mqueue.c b/ipc/mqueue.c +index 089c34d0732cf..79b0079ee1acb 100644 +--- a/ipc/mqueue.c ++++ b/ipc/mqueue.c +@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info); + + static struct kmem_cache *mqueue_inode_cachep; + +-static struct ctl_table_header *mq_sysctl_table; +- + static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode) + { + return container_of(inode, struct mqueue_inode_info, vfs_inode); +@@ -1727,8 +1725,10 @@ static int __init init_mqueue_fs(void) + if (mqueue_inode_cachep == NULL) + return -ENOMEM; + +- /* ignore failures - they are not fatal */ +- mq_sysctl_table = mq_register_sysctl_table(); ++ if (!setup_mq_sysctls(&init_ipc_ns)) { ++ pr_warn("sysctl registration failed\n"); ++ return -ENOMEM; ++ } + + error = register_filesystem(&mqueue_fs_type); + if (error) +@@ -1745,8 +1745,6 @@ static int __init init_mqueue_fs(void) + out_filesystem: + unregister_filesystem(&mqueue_fs_type); + out_sysctl: +- if (mq_sysctl_table) +- unregister_sysctl_table(mq_sysctl_table); + kmem_cache_destroy(mqueue_inode_cachep); + return error; + } +diff --git a/ipc/namespace.c b/ipc/namespace.c +index ae83f0f2651b7..f760243ca685c 100644 +--- a/ipc/namespace.c ++++ b/ipc/namespace.c +@@ -59,6 +59,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns, + if (err) + goto fail_put; + ++ err = -ENOMEM; ++ if (!setup_mq_sysctls(ns)) ++ goto fail_put; ++ + sem_init_ns(ns); + msg_init_ns(ns); + shm_init_ns(ns); +@@ -125,6 +129,8 @@ static void free_ipc_ns(struct ipc_namespace *ns) + msg_exit_ns(ns); + shm_exit_ns(ns); + ++ retire_mq_sysctls(ns); ++ + dec_ipc_namespaces(ns->ucounts); + put_user_ns(ns->user_ns); + ns_free_inum(&ns->ns); +-- +2.43.0 + diff --git a/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch b/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch new file mode 100644 index 00000000000..cee7af756bc --- /dev/null +++ b/queue-5.15/ipv4-fix-source-address-selection-with-route-leak.patch @@ -0,0 +1,53 @@ +From dfd009372d960dc1ccf694e7369d58e63cd133c4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 10 Jul 2024 10:14:27 +0200 +Subject: ipv4: fix source address selection with route leak + +From: Nicolas Dichtel + +[ Upstream commit 6807352353561187a718e87204458999dbcbba1b ] + +By default, an address assigned to the output interface is selected when +the source address is not specified. This is problematic when a route, +configured in a vrf, uses an interface from another vrf (aka route leak). +The original vrf does not own the selected source address. + +Let's add a check against the output interface and call the appropriate +function to select the source address. + +CC: stable@vger.kernel.org +Fixes: 8cbb512c923d ("net: Add source address lookup op for VRF") +Signed-off-by: Nicolas Dichtel +Reviewed-by: David Ahern +Link: https://patch.msgid.link/20240710081521.3809742-2-nicolas.dichtel@6wind.com +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/ipv4/fib_semantics.c | 13 +++++++++++-- + 1 file changed, 11 insertions(+), 2 deletions(-) + +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 3d00253afbb8d..4f1236458c214 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -2286,6 +2286,15 @@ void fib_select_path(struct net *net, struct fib_result *res, + fib_select_default(fl4, res); + + check_saddr: +- if (!fl4->saddr) +- fl4->saddr = fib_result_prefsrc(net, res); ++ if (!fl4->saddr) { ++ struct net_device *l3mdev; ++ ++ l3mdev = dev_get_by_index_rcu(net, fl4->flowi4_l3mdev); ++ ++ if (!l3mdev || ++ l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) == l3mdev) ++ fl4->saddr = fib_result_prefsrc(net, res); ++ else ++ fl4->saddr = inet_select_addr(l3mdev, 0, RT_SCOPE_LINK); ++ } + } +-- +2.43.0 + diff --git a/queue-5.15/irqchip-imx-irqsteer-add-runtime-pm-support.patch b/queue-5.15/irqchip-imx-irqsteer-add-runtime-pm-support.patch new file mode 100644 index 00000000000..40d3be43200 --- /dev/null +++ b/queue-5.15/irqchip-imx-irqsteer-add-runtime-pm-support.patch @@ -0,0 +1,85 @@ +From 351c9d063c713bd97552bb9f9945783def3576cc Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Apr 2022 18:37:01 +0200 +Subject: irqchip/imx-irqsteer: Add runtime PM support + +From: Lucas Stach + +[ Upstream commit 4730d2233311d86cad9dc510318d1b40e4b53cf2 ] + +There are now SoCs that integrate the irqsteer controller within +a separate power domain. In order to allow this domain to be +powered down when not needed, add runtime PM support to the driver. + +Signed-off-by: Lucas Stach +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20220406163701.1277930-2-l.stach@pengutronix.de +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 14 +++++++++++--- + 1 file changed, 11 insertions(+), 3 deletions(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index e286e7c5ccbfb..96230a04ec238 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + #include + + #define CTRL_STRIDE_OFF(_t, _r) (_t * 4 * _r) +@@ -175,7 +176,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + data->irq_count = DIV_ROUND_UP(irqs_num, 64); + data->reg_num = irqs_num / 32; + +- if (IS_ENABLED(CONFIG_PM_SLEEP)) { ++ if (IS_ENABLED(CONFIG_PM)) { + data->saved_reg = devm_kzalloc(&pdev->dev, + sizeof(u32) * data->reg_num, + GFP_KERNEL); +@@ -199,6 +200,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + ret = -ENOMEM; + goto out; + } ++ irq_domain_set_pm_device(data->domain, &pdev->dev); + + if (!data->irq_count || data->irq_count > CHAN_MAX_OUTPUT_INT) { + ret = -EINVAL; +@@ -219,6 +221,9 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + + platform_set_drvdata(pdev, data); + ++ pm_runtime_set_active(&pdev->dev); ++ pm_runtime_enable(&pdev->dev); ++ + return 0; + out: + clk_disable_unprepare(data->ipg_clk); +@@ -241,7 +246,7 @@ static int imx_irqsteer_remove(struct platform_device *pdev) + return 0; + } + +-#ifdef CONFIG_PM_SLEEP ++#ifdef CONFIG_PM + static void imx_irqsteer_save_regs(struct irqsteer_data *data) + { + int i; +@@ -288,7 +293,10 @@ static int imx_irqsteer_resume(struct device *dev) + #endif + + static const struct dev_pm_ops imx_irqsteer_pm_ops = { +- SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(imx_irqsteer_suspend, imx_irqsteer_resume) ++ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, ++ pm_runtime_force_resume) ++ SET_RUNTIME_PM_OPS(imx_irqsteer_suspend, ++ imx_irqsteer_resume, NULL) + }; + + static const struct of_device_id imx_irqsteer_dt_ids[] = { +-- +2.43.0 + diff --git a/queue-5.15/irqchip-imx-irqsteer-constify-irq_chip-struct.patch b/queue-5.15/irqchip-imx-irqsteer-constify-irq_chip-struct.patch new file mode 100644 index 00000000000..233fa262a2d --- /dev/null +++ b/queue-5.15/irqchip-imx-irqsteer-constify-irq_chip-struct.patch @@ -0,0 +1,36 @@ +From 1a0462f9a40d9c586ef89a02e6cc249c92dd0348 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 6 Apr 2022 18:37:00 +0200 +Subject: irqchip/imx-irqsteer: Constify irq_chip struct + +From: Lucas Stach + +[ Upstream commit e9a50f12e579a48e124ac5adb93dafc35f0a46b8 ] + +The imx_irqsteer_irq_chip struct is constant data. + +Signed-off-by: Lucas Stach +Signed-off-by: Marc Zyngier +Link: https://lore.kernel.org/r/20220406163701.1277930-1-l.stach@pengutronix.de +Stable-dep-of: 33b1c47d1fc0 ("irqchip/imx-irqsteer: Handle runtime power management correctly") +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index 8d91a02593fc2..e286e7c5ccbfb 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -70,7 +70,7 @@ static void imx_irqsteer_irq_mask(struct irq_data *d) + raw_spin_unlock_irqrestore(&data->lock, flags); + } + +-static struct irq_chip imx_irqsteer_irq_chip = { ++static const struct irq_chip imx_irqsteer_irq_chip = { + .name = "irqsteer", + .irq_mask = imx_irqsteer_irq_mask, + .irq_unmask = imx_irqsteer_irq_unmask, +-- +2.43.0 + diff --git a/queue-5.15/irqchip-imx-irqsteer-handle-runtime-power-management.patch b/queue-5.15/irqchip-imx-irqsteer-handle-runtime-power-management.patch new file mode 100644 index 00000000000..28345256988 --- /dev/null +++ b/queue-5.15/irqchip-imx-irqsteer-handle-runtime-power-management.patch @@ -0,0 +1,107 @@ +From c4dd509c3ae6d48d4cceda130d567c9424531e72 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 3 Jul 2024 11:32:50 -0500 +Subject: irqchip/imx-irqsteer: Handle runtime power management correctly + +From: Shenwei Wang + +[ Upstream commit 33b1c47d1fc0b5f06a393bb915db85baacba18ea ] + +The power domain is automatically activated from clk_prepare(). However, on +certain platforms like i.MX8QM and i.MX8QXP, the power-on handling invokes +sleeping functions, which triggers the 'scheduling while atomic' bug in the +context switch path during device probing: + + BUG: scheduling while atomic: kworker/u13:1/48/0x00000002 + Call trace: + __schedule_bug+0x54/0x6c + __schedule+0x7f0/0xa94 + schedule+0x5c/0xc4 + schedule_preempt_disabled+0x24/0x40 + __mutex_lock.constprop.0+0x2c0/0x540 + __mutex_lock_slowpath+0x14/0x20 + mutex_lock+0x48/0x54 + clk_prepare_lock+0x44/0xa0 + clk_prepare+0x20/0x44 + imx_irqsteer_resume+0x28/0xe0 + pm_generic_runtime_resume+0x2c/0x44 + __genpd_runtime_resume+0x30/0x80 + genpd_runtime_resume+0xc8/0x2c0 + __rpm_callback+0x48/0x1d8 + rpm_callback+0x6c/0x78 + rpm_resume+0x490/0x6b4 + __pm_runtime_resume+0x50/0x94 + irq_chip_pm_get+0x2c/0xa0 + __irq_do_set_handler+0x178/0x24c + irq_set_chained_handler_and_data+0x60/0xa4 + mxc_gpio_probe+0x160/0x4b0 + +Cure this by implementing the irq_bus_lock/sync_unlock() interrupt chip +callbacks and handle power management in them as they are invoked from +non-atomic context. + +[ tglx: Rewrote change log, added Fixes tag ] + +Fixes: 0136afa08967 ("irqchip: Add driver for imx-irqsteer controller") +Signed-off-by: Shenwei Wang +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240703163250.47887-1-shenwei.wang@nxp.com +Signed-off-by: Sasha Levin +--- + drivers/irqchip/irq-imx-irqsteer.c | 24 +++++++++++++++++++++--- + 1 file changed, 21 insertions(+), 3 deletions(-) + +diff --git a/drivers/irqchip/irq-imx-irqsteer.c b/drivers/irqchip/irq-imx-irqsteer.c +index 96230a04ec238..44ce85c27f57a 100644 +--- a/drivers/irqchip/irq-imx-irqsteer.c ++++ b/drivers/irqchip/irq-imx-irqsteer.c +@@ -35,6 +35,7 @@ struct irqsteer_data { + int channel; + struct irq_domain *domain; + u32 *saved_reg; ++ struct device *dev; + }; + + static int imx_irqsteer_get_reg_index(struct irqsteer_data *data, +@@ -71,10 +72,26 @@ static void imx_irqsteer_irq_mask(struct irq_data *d) + raw_spin_unlock_irqrestore(&data->lock, flags); + } + ++static void imx_irqsteer_irq_bus_lock(struct irq_data *d) ++{ ++ struct irqsteer_data *data = d->chip_data; ++ ++ pm_runtime_get_sync(data->dev); ++} ++ ++static void imx_irqsteer_irq_bus_sync_unlock(struct irq_data *d) ++{ ++ struct irqsteer_data *data = d->chip_data; ++ ++ pm_runtime_put_autosuspend(data->dev); ++} ++ + static const struct irq_chip imx_irqsteer_irq_chip = { +- .name = "irqsteer", +- .irq_mask = imx_irqsteer_irq_mask, +- .irq_unmask = imx_irqsteer_irq_unmask, ++ .name = "irqsteer", ++ .irq_mask = imx_irqsteer_irq_mask, ++ .irq_unmask = imx_irqsteer_irq_unmask, ++ .irq_bus_lock = imx_irqsteer_irq_bus_lock, ++ .irq_bus_sync_unlock = imx_irqsteer_irq_bus_sync_unlock, + }; + + static int imx_irqsteer_irq_map(struct irq_domain *h, unsigned int irq, +@@ -149,6 +166,7 @@ static int imx_irqsteer_probe(struct platform_device *pdev) + if (!data) + return -ENOMEM; + ++ data->dev = &pdev->dev; + data->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(data->regs)) { + dev_err(&pdev->dev, "failed to initialize reg\n"); +-- +2.43.0 + diff --git a/queue-5.15/irqdomain-fixed-unbalanced-fwnode-get-and-put.patch b/queue-5.15/irqdomain-fixed-unbalanced-fwnode-get-and-put.patch new file mode 100644 index 00000000000..ebb4d7c0119 --- /dev/null +++ b/queue-5.15/irqdomain-fixed-unbalanced-fwnode-get-and-put.patch @@ -0,0 +1,70 @@ +From e63bdbbdc2f445f70035b6d188456819a5e70cf1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 19:32:04 +0200 +Subject: irqdomain: Fixed unbalanced fwnode get and put + +From: Herve Codina + +[ Upstream commit 6ce3e98184b625d2870991880bf9586ded7ea7f9 ] + +fwnode_handle_get(fwnode) is called when a domain is created with fwnode +passed as a function parameter. fwnode_handle_put(domain->fwnode) is called +when the domain is destroyed but during the creation a path exists that +does not set domain->fwnode. + +If this path is taken, the fwnode get will never be put. + +To avoid the unbalanced get and put, set domain->fwnode unconditionally. + +Fixes: d59f6617eef0 ("genirq: Allow fwnode to carry name information only") +Signed-off-by: Herve Codina +Signed-off-by: Thomas Gleixner +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20240614173232.1184015-4-herve.codina@bootlin.com +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 7 ++----- + 1 file changed, 2 insertions(+), 5 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index 966ee3a79ce9c..b533b5b166444 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -153,7 +153,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + switch (fwid->type) { + case IRQCHIP_FWNODE_NAMED: + case IRQCHIP_FWNODE_NAMED_ID: +- domain->fwnode = fwnode; + domain->name = kstrdup(fwid->name, GFP_KERNEL); + if (!domain->name) { + kfree(domain); +@@ -162,7 +161,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + break; + default: +- domain->fwnode = fwnode; + domain->name = fwid->name; + break; + } +@@ -182,7 +180,6 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + } + + domain->name = strreplace(name, '/', ':'); +- domain->fwnode = fwnode; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + +@@ -198,8 +195,8 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } + +- fwnode_handle_get(fwnode); +- fwnode_dev_initialized(fwnode, true); ++ domain->fwnode = fwnode_handle_get(fwnode); ++ fwnode_dev_initialized(domain->fwnode, true); + + /* Fill structure */ + INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL); +-- +2.43.0 + diff --git a/queue-5.15/irqdomain-use-return-value-of-strreplace.patch b/queue-5.15/irqdomain-use-return-value-of-strreplace.patch new file mode 100644 index 00000000000..7c8af869250 --- /dev/null +++ b/queue-5.15/irqdomain-use-return-value-of-strreplace.patch @@ -0,0 +1,39 @@ +From bae7226ab8ad5c0968380581b021e4ddf7cbe622 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 28 Jun 2023 18:02:51 +0300 +Subject: irqdomain: Use return value of strreplace() + +From: Andy Shevchenko + +[ Upstream commit 67a4e1a3bf7c68ed3fbefc4213648165d912cabb ] + +Since strreplace() returns the pointer to the string itself, use it +directly. + +Signed-off-by: Andy Shevchenko +Signed-off-by: Thomas Gleixner +Link: https://lore.kernel.org/r/20230628150251.17832-1-andriy.shevchenko@linux.intel.com +Stable-dep-of: 6ce3e98184b6 ("irqdomain: Fixed unbalanced fwnode get and put") +Signed-off-by: Sasha Levin +--- + kernel/irq/irqdomain.c | 4 +--- + 1 file changed, 1 insertion(+), 3 deletions(-) + +diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c +index e0b67784ac1e0..966ee3a79ce9c 100644 +--- a/kernel/irq/irqdomain.c ++++ b/kernel/irq/irqdomain.c +@@ -181,9 +181,7 @@ static struct irq_domain *__irq_domain_create(struct fwnode_handle *fwnode, + return NULL; + } + +- strreplace(name, '/', ':'); +- +- domain->name = name; ++ domain->name = strreplace(name, '/', ':'); + domain->fwnode = fwnode; + domain->flags |= IRQ_DOMAIN_NAME_ALLOCATED; + } +-- +2.43.0 + diff --git a/queue-5.15/leds-trigger-call-synchronize_rcu-before-calling-tri.patch b/queue-5.15/leds-trigger-call-synchronize_rcu-before-calling-tri.patch new file mode 100644 index 00000000000..d8e3ed6879b --- /dev/null +++ b/queue-5.15/leds-trigger-call-synchronize_rcu-before-calling-tri.patch @@ -0,0 +1,60 @@ +From b7491e79898964a7dd2fce93defd35f1c950fa10 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 31 May 2024 14:01:24 +0200 +Subject: leds: trigger: Call synchronize_rcu() before calling trig->activate() + +From: Hans de Goede + +[ Upstream commit b1bbd20f35e19774ea01989320495e09ac44fba3 ] + +Some triggers call led_trigger_event() from their activate() callback +to initialize the brightness of the LED for which the trigger is being +activated. + +In order for the LED's initial state to be set correctly this requires that +the led_trigger_event() call uses the new version of trigger->led_cdevs, +which has the new LED. + +AFAICT led_trigger_event() will always use the new version when it is +running on the same CPU as where the list_add_tail_rcu() call was made, +which is why the missing synchronize_rcu() has not lead to bug reports. +But if activate() is pre-empted, sleeps or uses a worker then +the led_trigger_event() call may run on another CPU which may still use +the old trigger->led_cdevs list. + +Add a synchronize_rcu() call to ensure that any led_trigger_event() calls +done from activate() always use the new list. + +Triggers using led_trigger_event() from their activate() callback are: +net/bluetooth/leds.c, net/rfkill/core.c and drivers/tty/vt/keyboard.c. + +Signed-off-by: Hans de Goede +Link: https://lore.kernel.org/r/20240531120124.75662-1-hdegoede@redhat.com +Signed-off-by: Lee Jones +Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") +Signed-off-by: Sasha Levin +--- + drivers/leds/led-triggers.c | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c +index cdb446cb84af2..fe7fb2e7149c5 100644 +--- a/drivers/leds/led-triggers.c ++++ b/drivers/leds/led-triggers.c +@@ -193,6 +193,13 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + spin_unlock(&trig->leddev_list_lock); + led_cdev->trigger = trig; + ++ /* ++ * Some activate() calls use led_trigger_event() to initialize ++ * the brightness of the LED for which the trigger is being set. ++ * Ensure the led_cdev is visible on trig->led_cdevs for this. ++ */ ++ synchronize_rcu(); ++ + ret = 0; + if (trig->activate) + ret = trig->activate(led_cdev); +-- +2.43.0 + diff --git a/queue-5.15/leds-trigger-remove-unused-function-led_trigger_rena.patch b/queue-5.15/leds-trigger-remove-unused-function-led_trigger_rena.patch new file mode 100644 index 00000000000..664a0a588e5 --- /dev/null +++ b/queue-5.15/leds-trigger-remove-unused-function-led_trigger_rena.patch @@ -0,0 +1,77 @@ +From feedebc328768990cb3a8433a7e08e9299c5def7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 8 Dec 2023 23:56:41 +0100 +Subject: leds: trigger: Remove unused function led_trigger_rename_static() + +From: Heiner Kallweit + +[ Upstream commit c82a1662d4548c454de5343b88f69b9fc82266b3 ] + +This function was added with a8df7b1ab70b ("leds: add led_trigger_rename +function") 11 yrs ago, but it has no users. So remove it. + +Signed-off-by: Heiner Kallweit +Link: https://lore.kernel.org/r/d90f30be-f661-4db7-b0b5-d09d07a78a68@gmail.com +Signed-off-by: Lee Jones +Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") +Signed-off-by: Sasha Levin +--- + drivers/leds/led-triggers.c | 13 ------------- + include/linux/leds.h | 17 ----------------- + 2 files changed, 30 deletions(-) + +diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c +index 024b73f84ce0c..dddfc301d3414 100644 +--- a/drivers/leds/led-triggers.c ++++ b/drivers/leds/led-triggers.c +@@ -268,19 +268,6 @@ void led_trigger_set_default(struct led_classdev *led_cdev) + } + EXPORT_SYMBOL_GPL(led_trigger_set_default); + +-void led_trigger_rename_static(const char *name, struct led_trigger *trig) +-{ +- /* new name must be on a temporary string to prevent races */ +- BUG_ON(name == trig->name); +- +- down_write(&triggers_list_lock); +- /* this assumes that trig->name was originaly allocated to +- * non constant storage */ +- strcpy((char *)trig->name, name); +- up_write(&triggers_list_lock); +-} +-EXPORT_SYMBOL_GPL(led_trigger_rename_static); +- + /* LED Trigger Interface */ + + int led_trigger_register(struct led_trigger *trig) +diff --git a/include/linux/leds.h b/include/linux/leds.h +index ba4861ec73d30..2bbff7519b731 100644 +--- a/include/linux/leds.h ++++ b/include/linux/leds.h +@@ -409,23 +409,6 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) + return led_cdev->trigger_data; + } + +-/** +- * led_trigger_rename_static - rename a trigger +- * @name: the new trigger name +- * @trig: the LED trigger to rename +- * +- * Change a LED trigger name by copying the string passed in +- * name into current trigger name, which MUST be large +- * enough for the new string. +- * +- * Note that name must NOT point to the same string used +- * during LED registration, as that could lead to races. +- * +- * This is meant to be used on triggers with statically +- * allocated name. +- */ +-void led_trigger_rename_static(const char *name, struct led_trigger *trig); +- + #define module_led_trigger(__led_trigger) \ + module_driver(__led_trigger, led_trigger_register, \ + led_trigger_unregister) +-- +2.43.0 + diff --git a/queue-5.15/leds-trigger-store-brightness-set-by-led_trigger_eve.patch b/queue-5.15/leds-trigger-store-brightness-set-by-led_trigger_eve.patch new file mode 100644 index 00000000000..2bf38c69cbf --- /dev/null +++ b/queue-5.15/leds-trigger-store-brightness-set-by-led_trigger_eve.patch @@ -0,0 +1,98 @@ +From 25f994fb471bf3fa777063d0db72beb7a9c36513 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 4 Mar 2024 21:57:30 +0100 +Subject: leds: trigger: Store brightness set by led_trigger_event() + +From: Heiner Kallweit + +[ Upstream commit 822c91e72eac568ed8d83765634f00decb45666c ] + +If a simple trigger is assigned to a LED, then the LED may be off until +the next led_trigger_event() call. This may be an issue for simple +triggers with rare led_trigger_event() calls, e.g. power supply +charging indicators (drivers/power/supply/power_supply_leds.c). +Therefore persist the brightness value of the last led_trigger_event() +call and use this value if the trigger is assigned to a LED. +In addition add a getter for the trigger brightness value. + +Signed-off-by: Heiner Kallweit +Reviewed-by: Takashi Iwai +Link: https://lore.kernel.org/r/b1358b25-3f30-458d-8240-5705ae007a8a@gmail.com +Signed-off-by: Lee Jones +Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") +Signed-off-by: Sasha Levin +--- + drivers/leds/led-triggers.c | 6 ++++-- + include/linux/leds.h | 15 +++++++++++++++ + 2 files changed, 19 insertions(+), 2 deletions(-) + +diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c +index dddfc301d3414..cdb446cb84af2 100644 +--- a/drivers/leds/led-triggers.c ++++ b/drivers/leds/led-triggers.c +@@ -193,11 +193,11 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + spin_unlock(&trig->leddev_list_lock); + led_cdev->trigger = trig; + ++ ret = 0; + if (trig->activate) + ret = trig->activate(led_cdev); + else +- ret = 0; +- ++ led_set_brightness(led_cdev, trig->brightness); + if (ret) + goto err_activate; + +@@ -372,6 +372,8 @@ void led_trigger_event(struct led_trigger *trig, + if (!trig) + return; + ++ trig->brightness = brightness; ++ + rcu_read_lock(); + list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) + led_set_brightness(led_cdev, brightness); +diff --git a/include/linux/leds.h b/include/linux/leds.h +index 2bbff7519b731..79ab2dfd3c72f 100644 +--- a/include/linux/leds.h ++++ b/include/linux/leds.h +@@ -356,6 +356,9 @@ struct led_trigger { + int (*activate)(struct led_classdev *led_cdev); + void (*deactivate)(struct led_classdev *led_cdev); + ++ /* Brightness set by led_trigger_event */ ++ enum led_brightness brightness; ++ + /* LED-private triggers have this set */ + struct led_hw_trigger_type *trigger_type; + +@@ -409,6 +412,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) + return led_cdev->trigger_data; + } + ++static inline enum led_brightness ++led_trigger_get_brightness(const struct led_trigger *trigger) ++{ ++ return trigger ? trigger->brightness : LED_OFF; ++} ++ + #define module_led_trigger(__led_trigger) \ + module_driver(__led_trigger, led_trigger_register, \ + led_trigger_unregister) +@@ -445,6 +454,12 @@ static inline void *led_get_trigger_data(struct led_classdev *led_cdev) + return NULL; + } + ++static inline enum led_brightness ++led_trigger_get_brightness(const struct led_trigger *trigger) ++{ ++ return LED_OFF; ++} ++ + #endif /* CONFIG_LEDS_TRIGGERS */ + + /* Trigger specific functions */ +-- +2.43.0 + diff --git a/queue-5.15/leds-trigger-use-rcu-to-protect-the-led_cdevs-list.patch b/queue-5.15/leds-trigger-use-rcu-to-protect-the-led_cdevs-list.patch new file mode 100644 index 00000000000..9412135a823 --- /dev/null +++ b/queue-5.15/leds-trigger-use-rcu-to-protect-the-led_cdevs-list.patch @@ -0,0 +1,166 @@ +From 602b8fda838413ddea84a78759164be8c1f344a8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 15 Sep 2021 18:16:01 +0200 +Subject: leds: trigger: use RCU to protect the led_cdevs list + +From: Johannes Berg + +[ Upstream commit 2a5a8fa8b23144d14567d6f8293dd6fbeecee393 ] + +Even with the previous commit 27af8e2c90fb +("leds: trigger: fix potential deadlock with libata") +to this file, we still get lockdep unhappy, and Boqun +explained the report here: +https://lore.kernel.org/r/YNA+d1X4UkoQ7g8a@boqun-archlinux + +Effectively, this means that the read_lock_irqsave() isn't +enough here because another CPU might be trying to do a +write lock, and thus block the readers. + +This is all pretty messy, but it doesn't seem right that +the LEDs framework imposes some locking requirements on +users, in particular we'd have to make the spinlock in the +iwlwifi driver always disable IRQs, even if we don't need +that for any other reason, just to avoid this deadlock. + +Since writes to the led_cdevs list are rare (and are done +by userspace), just switch the list to RCU. This costs a +synchronize_rcu() at removal time so we can ensure things +are correct, but that seems like a small price to pay for +getting lock-free iterations and no deadlocks (nor any +locking requirements imposed on users.) + +Signed-off-by: Johannes Berg +Signed-off-by: Pavel Machek +Stable-dep-of: ab477b766edd ("leds: triggers: Flush pending brightness before activating trigger") +Signed-off-by: Sasha Levin +--- + drivers/leds/led-triggers.c | 41 +++++++++++++++++++------------------ + include/linux/leds.h | 2 +- + 2 files changed, 22 insertions(+), 21 deletions(-) + +diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c +index cbe70f38cb572..024b73f84ce0c 100644 +--- a/drivers/leds/led-triggers.c ++++ b/drivers/leds/led-triggers.c +@@ -157,7 +157,6 @@ EXPORT_SYMBOL_GPL(led_trigger_read); + /* Caller must ensure led_cdev->trigger_lock held */ + int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + { +- unsigned long flags; + char *event = NULL; + char *envp[2]; + const char *name; +@@ -171,10 +170,13 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + + /* Remove any existing trigger */ + if (led_cdev->trigger) { +- write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags); +- list_del(&led_cdev->trig_list); +- write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, +- flags); ++ spin_lock(&led_cdev->trigger->leddev_list_lock); ++ list_del_rcu(&led_cdev->trig_list); ++ spin_unlock(&led_cdev->trigger->leddev_list_lock); ++ ++ /* ensure it's no longer visible on the led_cdevs list */ ++ synchronize_rcu(); ++ + cancel_work_sync(&led_cdev->set_brightness_work); + led_stop_software_blink(led_cdev); + device_remove_groups(led_cdev->dev, led_cdev->trigger->groups); +@@ -186,9 +188,9 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + led_set_brightness(led_cdev, LED_OFF); + } + if (trig) { +- write_lock_irqsave(&trig->leddev_list_lock, flags); +- list_add_tail(&led_cdev->trig_list, &trig->led_cdevs); +- write_unlock_irqrestore(&trig->leddev_list_lock, flags); ++ spin_lock(&trig->leddev_list_lock); ++ list_add_tail_rcu(&led_cdev->trig_list, &trig->led_cdevs); ++ spin_unlock(&trig->leddev_list_lock); + led_cdev->trigger = trig; + + if (trig->activate) +@@ -223,9 +225,10 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + trig->deactivate(led_cdev); + err_activate: + +- write_lock_irqsave(&led_cdev->trigger->leddev_list_lock, flags); +- list_del(&led_cdev->trig_list); +- write_unlock_irqrestore(&led_cdev->trigger->leddev_list_lock, flags); ++ spin_lock(&led_cdev->trigger->leddev_list_lock); ++ list_del_rcu(&led_cdev->trig_list); ++ spin_unlock(&led_cdev->trigger->leddev_list_lock); ++ synchronize_rcu(); + led_cdev->trigger = NULL; + led_cdev->trigger_data = NULL; + led_set_brightness(led_cdev, LED_OFF); +@@ -285,7 +288,7 @@ int led_trigger_register(struct led_trigger *trig) + struct led_classdev *led_cdev; + struct led_trigger *_trig; + +- rwlock_init(&trig->leddev_list_lock); ++ spin_lock_init(&trig->leddev_list_lock); + INIT_LIST_HEAD(&trig->led_cdevs); + + down_write(&triggers_list_lock); +@@ -378,15 +381,14 @@ void led_trigger_event(struct led_trigger *trig, + enum led_brightness brightness) + { + struct led_classdev *led_cdev; +- unsigned long flags; + + if (!trig) + return; + +- read_lock_irqsave(&trig->leddev_list_lock, flags); +- list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) ++ rcu_read_lock(); ++ list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) + led_set_brightness(led_cdev, brightness); +- read_unlock_irqrestore(&trig->leddev_list_lock, flags); ++ rcu_read_unlock(); + } + EXPORT_SYMBOL_GPL(led_trigger_event); + +@@ -397,20 +399,19 @@ static void led_trigger_blink_setup(struct led_trigger *trig, + int invert) + { + struct led_classdev *led_cdev; +- unsigned long flags; + + if (!trig) + return; + +- read_lock_irqsave(&trig->leddev_list_lock, flags); +- list_for_each_entry(led_cdev, &trig->led_cdevs, trig_list) { ++ rcu_read_lock(); ++ list_for_each_entry_rcu(led_cdev, &trig->led_cdevs, trig_list) { + if (oneshot) + led_blink_set_oneshot(led_cdev, delay_on, delay_off, + invert); + else + led_blink_set(led_cdev, delay_on, delay_off); + } +- read_unlock_irqrestore(&trig->leddev_list_lock, flags); ++ rcu_read_unlock(); + } + + void led_trigger_blink(struct led_trigger *trig, +diff --git a/include/linux/leds.h b/include/linux/leds.h +index a0b730be40ad2..ba4861ec73d30 100644 +--- a/include/linux/leds.h ++++ b/include/linux/leds.h +@@ -360,7 +360,7 @@ struct led_trigger { + struct led_hw_trigger_type *trigger_type; + + /* LEDs under control by this trigger (for simple triggers) */ +- rwlock_t leddev_list_lock; ++ spinlock_t leddev_list_lock; + struct list_head led_cdevs; + + /* Link to next registered trigger */ +-- +2.43.0 + diff --git a/queue-5.15/leds-triggers-flush-pending-brightness-before-activa.patch b/queue-5.15/leds-triggers-flush-pending-brightness-before-activa.patch new file mode 100644 index 00000000000..fe4b0983467 --- /dev/null +++ b/queue-5.15/leds-triggers-flush-pending-brightness-before-activa.patch @@ -0,0 +1,66 @@ +From 941fc9e84f7f3cdf4384b5c14b62fb7e7a953dc4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 13 Jun 2024 17:24:51 +0200 +Subject: leds: triggers: Flush pending brightness before activating trigger +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit ab477b766edd3bfb6321a6e3df4c790612613fae ] + +The race fixed in timer_trig_activate() between a blocking +set_brightness() call and trigger->activate() can affect any trigger. +So move the call to flush_work() into led_trigger_set() where it can +avoid the race for all triggers. + +Fixes: 0db37915d912 ("leds: avoid races with workqueue") +Fixes: 8c0f693c6eff ("leds: avoid flush_work in atomic context") +Cc: stable@vger.kernel.org +Tested-by: Dustin L. Howett +Signed-off-by: Thomas Weißschuh +Link: https://lore.kernel.org/r/20240613-led-trigger-flush-v2-1-f4f970799d77@weissschuh.net +Signed-off-by: Lee Jones +Signed-off-by: Sasha Levin +--- + drivers/leds/led-triggers.c | 6 ++++++ + drivers/leds/trigger/ledtrig-timer.c | 5 ----- + 2 files changed, 6 insertions(+), 5 deletions(-) + +diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c +index fe7fb2e7149c5..3d3673c197e38 100644 +--- a/drivers/leds/led-triggers.c ++++ b/drivers/leds/led-triggers.c +@@ -200,6 +200,12 @@ int led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trig) + */ + synchronize_rcu(); + ++ /* ++ * If "set brightness to 0" is pending in workqueue, ++ * we don't want that to be reordered after ->activate() ++ */ ++ flush_work(&led_cdev->set_brightness_work); ++ + ret = 0; + if (trig->activate) + ret = trig->activate(led_cdev); +diff --git a/drivers/leds/trigger/ledtrig-timer.c b/drivers/leds/trigger/ledtrig-timer.c +index b4688d1d9d2b2..1d213c999d40a 100644 +--- a/drivers/leds/trigger/ledtrig-timer.c ++++ b/drivers/leds/trigger/ledtrig-timer.c +@@ -110,11 +110,6 @@ static int timer_trig_activate(struct led_classdev *led_cdev) + led_cdev->flags &= ~LED_INIT_DEFAULT_TRIGGER; + } + +- /* +- * If "set brightness to 0" is pending in workqueue, we don't +- * want that to be reordered after blink_set() +- */ +- flush_work(&led_cdev->set_brightness_work); + led_blink_set(led_cdev, &led_cdev->blink_delay_on, + &led_cdev->blink_delay_off); + +-- +2.43.0 + diff --git a/queue-5.15/mips-dts-loongson-fix-liointc-irq-polarity.patch b/queue-5.15/mips-dts-loongson-fix-liointc-irq-polarity.patch new file mode 100644 index 00000000000..ec7fe155f63 --- /dev/null +++ b/queue-5.15/mips-dts-loongson-fix-liointc-irq-polarity.patch @@ -0,0 +1,172 @@ +From 02034e3f2a019d6a96e6c68edcb5669b08c0fa12 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 16:40:10 +0100 +Subject: MIPS: dts: loongson: Fix liointc IRQ polarity + +From: Jiaxun Yang + +[ Upstream commit dbb69b9d6234aad23b3ecd33e5bc8a8ae1485b7d ] + +All internal liointc interrupts are high level triggered. + +Fixes: b1a792601f26 ("MIPS: Loongson64: DeviceTree for Loongson-2K1000") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Sasha Levin +--- + .../boot/dts/loongson/loongson64-2k1000.dtsi | 42 +++++++++---------- + 1 file changed, 21 insertions(+), 21 deletions(-) + +diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +index 969e142584f28..6ad771768dae1 100644 +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -88,7 +88,7 @@ rtc0: rtc@1fe07800 { + compatible = "loongson,ls2k1000-rtc"; + reg = <0 0x1fe07800 0 0x78>; + interrupt-parent = <&liointc0>; +- interrupts = <60 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <60 IRQ_TYPE_LEVEL_HIGH>; + }; + + uart0: serial@1fe00000 { +@@ -96,7 +96,7 @@ uart0: serial@1fe00000 { + reg = <0 0x1fe00000 0 0x8>; + clock-frequency = <125000000>; + interrupt-parent = <&liointc0>; +- interrupts = <0 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + no-loopback-test; + }; + +@@ -119,8 +119,8 @@ gmac@3,0 { + "pciclass0c03"; + + reg = <0x1800 0x0 0x0 0x0 0x0>; +- interrupts = <12 IRQ_TYPE_LEVEL_LOW>, +- <13 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, ++ <13 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; + phy-mode = "rgmii-id"; +@@ -143,8 +143,8 @@ gmac@3,1 { + "loongson, pci-gmac"; + + reg = <0x1900 0x0 0x0 0x0 0x0>; +- interrupts = <14 IRQ_TYPE_LEVEL_LOW>, +- <15 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, ++ <15 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; + interrupt-parent = <&liointc0>; + phy-mode = "rgmii-id"; +@@ -166,7 +166,7 @@ ehci@4,1 { + "pciclass0c03"; + + reg = <0x2100 0x0 0x0 0x0 0x0>; +- interrupts = <18 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <18 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + }; + +@@ -177,7 +177,7 @@ ohci@4,2 { + "pciclass0c03"; + + reg = <0x2200 0x0 0x0 0x0 0x0>; +- interrupts = <19 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + }; + +@@ -188,7 +188,7 @@ sata@8,0 { + "pciclass0106"; + + reg = <0x4000 0x0 0x0 0x0 0x0>; +- interrupts = <19 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <19 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc0>; + }; + +@@ -203,10 +203,10 @@ pcie@9,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <0 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +@@ -222,10 +222,10 @@ pcie@a,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <1 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <1 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +@@ -241,10 +241,10 @@ pcie@b,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <2 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <2 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +@@ -260,10 +260,10 @@ pcie@c,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <3 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +@@ -279,10 +279,10 @@ pcie@d,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <4 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +@@ -298,10 +298,10 @@ pcie@e,0 { + #size-cells = <2>; + device_type = "pci"; + #interrupt-cells = <1>; +- interrupts = <5 IRQ_TYPE_LEVEL_LOW>; ++ interrupts = <5 IRQ_TYPE_LEVEL_HIGH>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; +- interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_LOW>; ++ interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_HIGH>; + ranges; + external-facing; + }; +-- +2.43.0 + diff --git a/queue-5.15/mips-dts-loongson-fix-ls2k1000-rtc-interrupt.patch b/queue-5.15/mips-dts-loongson-fix-ls2k1000-rtc-interrupt.patch new file mode 100644 index 00000000000..7c1bef93c2b --- /dev/null +++ b/queue-5.15/mips-dts-loongson-fix-ls2k1000-rtc-interrupt.patch @@ -0,0 +1,38 @@ +From 99b2c1a3f9c50940b892df591c437567d92f9ba3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 14 Jun 2024 16:40:11 +0100 +Subject: MIPS: dts: loongson: Fix ls2k1000-rtc interrupt + +From: Jiaxun Yang + +[ Upstream commit f70fd92df7529e7283e02a6c3a2510075f13ba30 ] + +The correct interrupt line for RTC is line 8 on liointc1. + +Fixes: e47084e116fc ("MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000") +Cc: stable@vger.kernel.org +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Signed-off-by: Sasha Levin +--- + arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +index 6ad771768dae1..f3fad477ddce2 100644 +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -87,8 +87,8 @@ liointc1: interrupt-controller@1fe11440 { + rtc0: rtc@1fe07800 { + compatible = "loongson,ls2k1000-rtc"; + reg = <0 0x1fe07800 0 0x78>; +- interrupt-parent = <&liointc0>; +- interrupts = <60 IRQ_TYPE_LEVEL_HIGH>; ++ interrupt-parent = <&liointc1>; ++ interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; + }; + + uart0: serial@1fe00000 { +-- +2.43.0 + diff --git a/queue-5.15/mips-loongson64-dts-add-rtc-support-to-loongson-2k10.patch b/queue-5.15/mips-loongson64-dts-add-rtc-support-to-loongson-2k10.patch new file mode 100644 index 00000000000..bcf8aefab8e --- /dev/null +++ b/queue-5.15/mips-loongson64-dts-add-rtc-support-to-loongson-2k10.patch @@ -0,0 +1,42 @@ +From 78b1e5a20134fb0681543c72f51e3bf79ebee09e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Jun 2023 17:50:50 +0800 +Subject: MIPS: Loongson64: DTS: Add RTC support to Loongson-2K1000 + +From: Binbin Zhou + +[ Upstream commit e47084e116fccaa43644360d7c0b997979abce3e ] + +The module is now supported, enable it. + +Acked-by: Jiaxun Yang +Signed-off-by: Binbin Zhou +Signed-off-by: WANG Xuerui +Signed-off-by: Thomas Bogendoerfer +Stable-dep-of: dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity") +Signed-off-by: Sasha Levin +--- + arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +index b44aedba350a6..03abda568aa60 100644 +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -84,6 +84,13 @@ liointc1: interrupt-controller@1fe11440 { + <0x00000000>; /* int3 */ + }; + ++ rtc0: rtc@1fe07800 { ++ compatible = "loongson,ls2k1000-rtc"; ++ reg = <0 0x1fe07800 0 0x78>; ++ interrupt-parent = <&liointc0>; ++ interrupts = <60 IRQ_TYPE_LEVEL_LOW>; ++ }; ++ + uart0: serial@1fe00000 { + compatible = "ns16550a"; + reg = <0 0x1fe00000 0 0x8>; +-- +2.43.0 + diff --git a/queue-5.15/mips-loongson64-dts-fix-pcie-port-nodes-for-ls7a.patch b/queue-5.15/mips-loongson64-dts-fix-pcie-port-nodes-for-ls7a.patch new file mode 100644 index 00000000000..d9117fbaabc --- /dev/null +++ b/queue-5.15/mips-loongson64-dts-fix-pcie-port-nodes-for-ls7a.patch @@ -0,0 +1,161 @@ +From 38e3bff0e15f788e7e9469540882fc2a26d440f4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 7 May 2024 19:51:22 +0100 +Subject: MIPS: Loongson64: DTS: Fix PCIe port nodes for ls7a + +From: Jiaxun Yang + +[ Upstream commit d89a415ff8d5e0aad4963f2d8ebb0f9e8110b7fa ] + +Add various required properties to silent warnings: + +arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi:116.16-297.5: Warning (interrupt_provider): /bus@10000000/pci@1a000000: '#interrupt-cells' found, but node is not an interrupt provider +arch/mips/boot/dts/loongson/loongson64_2core_2k1000.dtb: Warning (interrupt_map): Failed prerequisite 'interrupt_provider' + +Signed-off-by: Jiaxun Yang +Signed-off-by: Thomas Bogendoerfer +Stable-dep-of: dbb69b9d6234 ("MIPS: dts: loongson: Fix liointc IRQ polarity") +Signed-off-by: Sasha Levin +--- + .../boot/dts/loongson/loongson64-2k1000.dtsi | 37 +++++++++++++++---- + 1 file changed, 30 insertions(+), 7 deletions(-) + +diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +index 03abda568aa60..969e142584f28 100644 +--- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi ++++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +@@ -105,7 +105,6 @@ pci@1a000000 { + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; +- #interrupt-cells = <2>; + + reg = <0 0x1a000000 0 0x02000000>, + <0xfe 0x00000000 0 0x20000000>; +@@ -193,93 +192,117 @@ sata@8,0 { + interrupt-parent = <&liointc0>; + }; + +- pci_bridge@9,0 { ++ pcie@9,0 { + compatible = "pci0014,7a19.0", + "pci0014,7a19", + "pciclass060400", + "pciclass0604"; + + reg = <0x4800 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <0 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 0 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +- pci_bridge@a,0 { ++ pcie@a,0 { + compatible = "pci0014,7a09.0", + "pci0014,7a09", + "pciclass060400", + "pciclass0604"; + + reg = <0x5000 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <1 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 1 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +- pci_bridge@b,0 { ++ pcie@b,0 { + compatible = "pci0014,7a09.0", + "pci0014,7a09", + "pciclass060400", + "pciclass0604"; + + reg = <0x5800 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <2 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 2 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +- pci_bridge@c,0 { ++ pcie@c,0 { + compatible = "pci0014,7a09.0", + "pci0014,7a09", + "pciclass060400", + "pciclass0604"; + + reg = <0x6000 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 3 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +- pci_bridge@d,0 { ++ pcie@d,0 { + compatible = "pci0014,7a19.0", + "pci0014,7a19", + "pciclass060400", + "pciclass0604"; + + reg = <0x6800 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <4 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 4 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +- pci_bridge@e,0 { ++ pcie@e,0 { + compatible = "pci0014,7a09.0", + "pci0014,7a09", + "pciclass060400", + "pciclass0604"; + + reg = <0x7000 0x0 0x0 0x0 0x0>; ++ #address-cells = <3>; ++ #size-cells = <2>; ++ device_type = "pci"; + #interrupt-cells = <1>; + interrupts = <5 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&liointc1>; + interrupt-map-mask = <0 0 0 0>; + interrupt-map = <0 0 0 0 &liointc1 5 IRQ_TYPE_LEVEL_LOW>; ++ ranges; + external-facing; + }; + +-- +2.43.0 + diff --git a/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch b/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch new file mode 100644 index 00000000000..b2687491f2a --- /dev/null +++ b/queue-5.15/net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch @@ -0,0 +1,419 @@ +From 16e9d306e5b7b1f72a5ad15fef79d17b4b72c9fd Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 14 Mar 2022 14:45:51 -0600 +Subject: net: Add l3mdev index to flow struct and avoid oif reset for port + devices + +From: David Ahern + +[ Upstream commit 40867d74c374b235e14d839f3a77f26684feefe5 ] + +The fundamental premise of VRF and l3mdev core code is binding a socket +to a device (l3mdev or netdev with an L3 domain) to indicate L3 scope. +Legacy code resets flowi_oif to the l3mdev losing any original port +device binding. Ben (among others) has demonstrated use cases where the +original port device binding is important and needs to be retained. +This patch handles that by adding a new entry to the common flow struct +that can indicate the l3mdev index for later rule and table matching +avoiding the need to reset flowi_oif. + +In addition to allowing more use cases that require port device binds, +this patch brings a few datapath simplications: + +1. l3mdev_fib_rule_match is only called when walking fib rules and + always after l3mdev_update_flow. That allows an optimization to bail + early for non-VRF type uses cases when flowi_l3mdev is not set. Also, + only that index needs to be checked for the FIB table id. + +2. l3mdev_update_flow can be called with flowi_oif set to a l3mdev + (e.g., VRF) device. By resetting flowi_oif only for this case the + FLOWI_FLAG_SKIP_NH_OIF flag is not longer needed and can be removed, + removing several checks in the datapath. The flowi_iif path can be + simplified to only be called if the it is not loopback (loopback can + not be assigned to an L3 domain) and the l3mdev index is not already + set. + +3. Avoid another device lookup in the output path when the fib lookup + returns a reject failure. + +Note: 2 functional tests for local traffic with reject fib rules are +updated to reflect the new direct failure at FIB lookup time for ping +rather than the failure on packet path. The current code fails like this: + + HINT: Fails since address on vrf device is out of device scope + COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 + ping: Warning: source address might be selected on device other than: eth1 + PING 172.16.3.1 (172.16.3.1) from 172.16.3.1 eth1: 56(84) bytes of data. + + --- 172.16.3.1 ping statistics --- + 1 packets transmitted, 0 received, 100% packet loss, time 0ms + +where the test now directly fails: + + HINT: Fails since address on vrf device is out of device scope + COMMAND: ip netns exec ns-A ping -c1 -w1 -I eth1 172.16.3.1 + ping: connect: No route to host + +Signed-off-by: David Ahern +Tested-by: Ben Greear +Link: https://lore.kernel.org/r/20220314204551.16369-1-dsahern@kernel.org +Signed-off-by: Jakub Kicinski +Stable-dep-of: 680735235356 ("ipv4: fix source address selection with route leak") +Signed-off-by: Sasha Levin +--- + drivers/net/vrf.c | 7 ++-- + include/net/flow.h | 6 +++- + net/ipv4/fib_frontend.c | 7 ++-- + net/ipv4/fib_semantics.c | 2 +- + net/ipv4/fib_trie.c | 7 ++-- + net/ipv4/route.c | 4 +-- + net/ipv4/xfrm4_policy.c | 4 +-- + net/ipv6/ip6_output.c | 3 +- + net/ipv6/route.c | 12 ------- + net/ipv6/xfrm6_policy.c | 3 +- + net/l3mdev/l3mdev.c | 43 +++++++++-------------- + tools/testing/selftests/net/fcnal-test.sh | 2 +- + 12 files changed, 37 insertions(+), 63 deletions(-) + +diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c +index 091dd7caf10cc..85f5d78ff9ac0 100644 +--- a/drivers/net/vrf.c ++++ b/drivers/net/vrf.c +@@ -471,14 +471,13 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, + + memset(&fl6, 0, sizeof(fl6)); + /* needed to match OIF rule */ +- fl6.flowi6_oif = dev->ifindex; ++ fl6.flowi6_l3mdev = dev->ifindex; + fl6.flowi6_iif = LOOPBACK_IFINDEX; + fl6.daddr = iph->daddr; + fl6.saddr = iph->saddr; + fl6.flowlabel = ip6_flowinfo(iph); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = iph->nexthdr; +- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst) || dst == dst_null) +@@ -550,10 +549,10 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, + + memset(&fl4, 0, sizeof(fl4)); + /* needed to match OIF rule */ +- fl4.flowi4_oif = vrf_dev->ifindex; ++ fl4.flowi4_l3mdev = vrf_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; + fl4.flowi4_tos = RT_TOS(ip4h->tos); +- fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF; ++ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC; + fl4.flowi4_proto = ip4h->protocol; + fl4.daddr = ip4h->daddr; + fl4.saddr = ip4h->saddr; +diff --git a/include/net/flow.h b/include/net/flow.h +index 776bacc96242a..079cc493fe67d 100644 +--- a/include/net/flow.h ++++ b/include/net/flow.h +@@ -29,6 +29,7 @@ struct flowi_tunnel { + struct flowi_common { + int flowic_oif; + int flowic_iif; ++ int flowic_l3mdev; + __u32 flowic_mark; + __u8 flowic_tos; + __u8 flowic_scope; +@@ -36,7 +37,6 @@ struct flowi_common { + __u8 flowic_flags; + #define FLOWI_FLAG_ANYSRC 0x01 + #define FLOWI_FLAG_KNOWN_NH 0x02 +-#define FLOWI_FLAG_SKIP_NH_OIF 0x04 + __u32 flowic_secid; + kuid_t flowic_uid; + __u32 flowic_multipath_hash; +@@ -65,6 +65,7 @@ struct flowi4 { + struct flowi_common __fl_common; + #define flowi4_oif __fl_common.flowic_oif + #define flowi4_iif __fl_common.flowic_iif ++#define flowi4_l3mdev __fl_common.flowic_l3mdev + #define flowi4_mark __fl_common.flowic_mark + #define flowi4_tos __fl_common.flowic_tos + #define flowi4_scope __fl_common.flowic_scope +@@ -97,6 +98,7 @@ static inline void flowi4_init_output(struct flowi4 *fl4, int oif, + { + fl4->flowi4_oif = oif; + fl4->flowi4_iif = LOOPBACK_IFINDEX; ++ fl4->flowi4_l3mdev = 0; + fl4->flowi4_mark = mark; + fl4->flowi4_tos = tos; + fl4->flowi4_scope = scope; +@@ -127,6 +129,7 @@ struct flowi6 { + struct flowi_common __fl_common; + #define flowi6_oif __fl_common.flowic_oif + #define flowi6_iif __fl_common.flowic_iif ++#define flowi6_l3mdev __fl_common.flowic_l3mdev + #define flowi6_mark __fl_common.flowic_mark + #define flowi6_scope __fl_common.flowic_scope + #define flowi6_proto __fl_common.flowic_proto +@@ -156,6 +159,7 @@ struct flowi { + } u; + #define flowi_oif u.__fl_common.flowic_oif + #define flowi_iif u.__fl_common.flowic_iif ++#define flowi_l3mdev u.__fl_common.flowic_l3mdev + #define flowi_mark u.__fl_common.flowic_mark + #define flowi_tos u.__fl_common.flowic_tos + #define flowi_scope u.__fl_common.flowic_scope +diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c +index c21d57f02c651..5a3af86ee417a 100644 +--- a/net/ipv4/fib_frontend.c ++++ b/net/ipv4/fib_frontend.c +@@ -290,7 +290,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) + bool vmark = in_dev && IN_DEV_SRC_VMARK(in_dev); + struct flowi4 fl4 = { + .flowi4_iif = LOOPBACK_IFINDEX, +- .flowi4_oif = l3mdev_master_ifindex_rcu(dev), ++ .flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev), + .daddr = ip_hdr(skb)->saddr, + .flowi4_tos = ip_hdr(skb)->tos & IPTOS_RT_MASK, + .flowi4_scope = scope, +@@ -352,9 +352,8 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, + bool dev_match; + + fl4.flowi4_oif = 0; +- fl4.flowi4_iif = l3mdev_master_ifindex_rcu(dev); +- if (!fl4.flowi4_iif) +- fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; ++ fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(dev); ++ fl4.flowi4_iif = oif ? : LOOPBACK_IFINDEX; + fl4.daddr = src; + fl4.saddr = dst; + fl4.flowi4_tos = tos; +diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c +index 735901b8c9f69..3d00253afbb8d 100644 +--- a/net/ipv4/fib_semantics.c ++++ b/net/ipv4/fib_semantics.c +@@ -2269,7 +2269,7 @@ void fib_select_multipath(struct fib_result *res, int hash) + void fib_select_path(struct net *net, struct fib_result *res, + struct flowi4 *fl4, const struct sk_buff *skb) + { +- if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) ++ if (fl4->flowi4_oif) + goto check_saddr; + + #ifdef CONFIG_IP_ROUTE_MULTIPATH +diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c +index 0b74debeecbb1..ec0113ecf3949 100644 +--- a/net/ipv4/fib_trie.c ++++ b/net/ipv4/fib_trie.c +@@ -1428,11 +1428,8 @@ bool fib_lookup_good_nhc(const struct fib_nh_common *nhc, int fib_flags, + !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) + return false; + +- if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { +- if (flp->flowi4_oif && +- flp->flowi4_oif != nhc->nhc_oif) +- return false; +- } ++ if (flp->flowi4_oif && flp->flowi4_oif != nhc->nhc_oif) ++ return false; + + return true; + } +diff --git a/net/ipv4/route.c b/net/ipv4/route.c +index 60fc35defdf8b..3522801885787 100644 +--- a/net/ipv4/route.c ++++ b/net/ipv4/route.c +@@ -2285,6 +2285,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, + /* + * Now we are ready to route packet. + */ ++ fl4.flowi4_l3mdev = 0; + fl4.flowi4_oif = 0; + fl4.flowi4_iif = dev->ifindex; + fl4.flowi4_mark = skb->mark; +@@ -2761,8 +2762,7 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, + res->fi = NULL; + res->table = NULL; + if (fl4->flowi4_oif && +- (ipv4_is_multicast(fl4->daddr) || +- !netif_index_is_l3_master(net, fl4->flowi4_oif))) { ++ (ipv4_is_multicast(fl4->daddr) || !fl4->flowi4_l3mdev)) { + /* Apparently, routing tables are wrong. Assume, + * that the destination is on link. + * +diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c +index 9ebd54752e03b..4548a91acdc89 100644 +--- a/net/ipv4/xfrm4_policy.c ++++ b/net/ipv4/xfrm4_policy.c +@@ -28,13 +28,11 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, + memset(fl4, 0, sizeof(*fl4)); + fl4->daddr = daddr->a4; + fl4->flowi4_tos = tos; +- fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); ++ fl4->flowi4_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_mark = mark; + if (saddr) + fl4->saddr = saddr->a4; + +- fl4->flowi4_flags = FLOWI_FLAG_SKIP_NH_OIF; +- + rt = __ip_route_output_key(net, fl4); + if (!IS_ERR(rt)) + return &rt->dst; +diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c +index ce37c83455796..afcc3c44d87cf 100644 +--- a/net/ipv6/ip6_output.c ++++ b/net/ipv6/ip6_output.c +@@ -1058,8 +1058,7 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, + #ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || + #endif +- (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) && +- (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) { ++ (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { + dst_release(dst); + dst = NULL; + } +diff --git a/net/ipv6/route.c b/net/ipv6/route.c +index d937ee942a4fc..35d3f02ddf163 100644 +--- a/net/ipv6/route.c ++++ b/net/ipv6/route.c +@@ -1209,9 +1209,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_lookup(struct net *net, + struct fib6_node *fn; + struct rt6_info *rt; + +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- flags &= ~RT6_LOOKUP_F_IFACE; +- + rcu_read_lock(); + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + restart: +@@ -2182,9 +2179,6 @@ int fib6_table_lookup(struct net *net, struct fib6_table *table, int oif, + fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); + saved_fn = fn; + +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- oif = 0; +- + redo_rt6_select: + rt6_select(net, fn, oif, res, strict); + if (res->f6i == net->ipv6.fib6_null_entry) { +@@ -3060,12 +3054,6 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *__ip6_route_redirect(struct net *net, + struct fib6_info *rt; + struct fib6_node *fn; + +- /* l3mdev_update_flow overrides oif if the device is enslaved; in +- * this case we must match on the real ingress device, so reset it +- */ +- if (fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) +- fl6->flowi6_oif = skb->dev->ifindex; +- + /* Get the "current" route for this destination and + * check if the redirect has come from appropriate router. + * +diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c +index 7c903e0e446cb..492b9692c0dc0 100644 +--- a/net/ipv6/xfrm6_policy.c ++++ b/net/ipv6/xfrm6_policy.c +@@ -33,8 +33,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, + int err; + + memset(&fl6, 0, sizeof(fl6)); +- fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); +- fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; ++ fl6.flowi6_l3mdev = l3mdev_master_ifindex_by_index(net, oif); + fl6.flowi6_mark = mark; + memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); + if (saddr) +diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c +index 8b14a24f10404..ca10916340b09 100644 +--- a/net/l3mdev/l3mdev.c ++++ b/net/l3mdev/l3mdev.c +@@ -250,25 +250,19 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + struct net_device *dev; + int rc = 0; + +- rcu_read_lock(); ++ /* update flow ensures flowi_l3mdev is set when relevant */ ++ if (!fl->flowi_l3mdev) ++ return 0; + +- dev = dev_get_by_index_rcu(net, fl->flowi_oif); +- if (dev && netif_is_l3_master(dev) && +- dev->l3mdev_ops->l3mdev_fib_table) { +- arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); +- rc = 1; +- goto out; +- } ++ rcu_read_lock(); + +- dev = dev_get_by_index_rcu(net, fl->flowi_iif); ++ dev = dev_get_by_index_rcu(net, fl->flowi_l3mdev); + if (dev && netif_is_l3_master(dev) && + dev->l3mdev_ops->l3mdev_fib_table) { + arg->table = dev->l3mdev_ops->l3mdev_fib_table(dev); + rc = 1; +- goto out; + } + +-out: + rcu_read_unlock(); + + return rc; +@@ -277,31 +271,28 @@ int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, + void l3mdev_update_flow(struct net *net, struct flowi *fl) + { + struct net_device *dev; +- int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { +- ifindex = l3mdev_master_ifindex_rcu(dev); +- if (ifindex) { +- fl->flowi_oif = ifindex; +- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; +- goto out; +- } ++ if (!fl->flowi_l3mdev) ++ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); ++ ++ /* oif set to L3mdev directs lookup to its table; ++ * reset to avoid oif match in fib_lookup ++ */ ++ if (netif_is_l3_master(dev)) ++ fl->flowi_oif = 0; ++ goto out; + } + } + +- if (fl->flowi_iif) { ++ if (fl->flowi_iif > LOOPBACK_IFINDEX && !fl->flowi_l3mdev) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); +- if (dev) { +- ifindex = l3mdev_master_ifindex_rcu(dev); +- if (ifindex) { +- fl->flowi_iif = ifindex; +- fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; +- } +- } ++ if (dev) ++ fl->flowi_l3mdev = l3mdev_master_ifindex_rcu(dev); + } + + out: +diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh +index 6ecdbbe1b54fb..bed85001da735 100755 +--- a/tools/testing/selftests/net/fcnal-test.sh ++++ b/tools/testing/selftests/net/fcnal-test.sh +@@ -750,7 +750,7 @@ ipv4_ping_vrf() + log_start + show_hint "Fails since address on vrf device is out of device scope" + run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a} +- log_test_addr ${a} $? 1 "ping local, device bind" ++ log_test_addr ${a} $? 2 "ping local, device bind" + done + + # +-- +2.43.0 + diff --git a/queue-5.15/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rpr.patch b/queue-5.15/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rpr.patch new file mode 100644 index 00000000000..2a01c5ea53b --- /dev/null +++ b/queue-5.15/remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rpr.patch @@ -0,0 +1,64 @@ +From cdad322d19592d7bad837d59f5083dfca421533d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 12 Jun 2024 16:17:14 +0300 +Subject: remoteproc: imx_rproc: Fix refcount mistake in imx_rproc_addr_init + +From: Aleksandr Mishin + +[ Upstream commit dce68a49be26abf52712e0ee452a45fa01ab4624 ] + +In imx_rproc_addr_init() strcmp() is performed over the node after the +of_node_put() is performed over it. +Fix this error by moving of_node_put() calls. + +Found by Linux Verification Center (linuxtesting.org) with SVACE. + +Fixes: 5e4c1243071d ("remoteproc: imx_rproc: support remote cores booted before Linux Kernel") +Cc: stable@vger.kernel.org +Signed-off-by: Aleksandr Mishin +Link: https://lore.kernel.org/r/20240612131714.12907-1-amishin@t-argos.ru +Signed-off-by: Mathieu Poirier +Signed-off-by: Sasha Levin +--- + drivers/remoteproc/imx_rproc.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/drivers/remoteproc/imx_rproc.c b/drivers/remoteproc/imx_rproc.c +index cfdb96cafc7b9..d5ce97e75f027 100644 +--- a/drivers/remoteproc/imx_rproc.c ++++ b/drivers/remoteproc/imx_rproc.c +@@ -596,25 +596,29 @@ static int imx_rproc_addr_init(struct imx_rproc *priv, + continue; + } + err = of_address_to_resource(node, 0, &res); +- of_node_put(node); + if (err) { + dev_err(dev, "unable to resolve memory region\n"); ++ of_node_put(node); + return err; + } + +- if (b >= IMX_RPROC_MEM_MAX) ++ if (b >= IMX_RPROC_MEM_MAX) { ++ of_node_put(node); + break; ++ } + + /* Not use resource version, because we might share region */ + priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev, res.start, resource_size(&res)); + if (!priv->mem[b].cpu_addr) { + dev_err(dev, "failed to remap %pr\n", &res); ++ of_node_put(node); + return -ENOMEM; + } + priv->mem[b].sys_addr = res.start; + priv->mem[b].size = resource_size(&res); + if (!strcmp(node->name, "rsc-table")) + priv->rsc_table = priv->mem[b].cpu_addr; ++ of_node_put(node); + b++; + } + +-- +2.43.0 + diff --git a/queue-5.15/series b/queue-5.15/series index 7811890a15b..2e3d4da5100 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -303,3 +303,46 @@ nvme-pci-add-missing-condition-check-for-existence-o.patch fs-don-t-allow-non-init-s_user_ns-for-filesystems-wi.patch powerpc-configs-update-defconfig-with-now-user-visible-config_fsl_ifc.patch f2fs-fix-wrong-continue-condition-in-gc.patch +arm64-dts-qcom-msm8996-move-clock-cells-to-qmp-phy-c.patch +arm64-dts-qcom-msm8998-drop-usb-phy-clock-index.patch +arm64-dts-qcom-msm8998-switch-usb-qmp-phy-to-new-sty.patch +arm64-dts-qcom-msm8998-disable-ss-instance-in-parkmo.patch +arm64-dts-qcom-ipq8074-disable-ss-instance-in-parkmo.patch +net-add-l3mdev-index-to-flow-struct-and-avoid-oif-re.patch +ipv4-fix-source-address-selection-with-route-leak.patch +ipc-check-checkpoint_restore_ns_capable-to-modify-c-.patch +ipc-ipc_sysctl.c-remove-fallback-for-config_proc_sys.patch +ipc-store-mqueue-sysctls-in-the-ipc-namespace.patch +ipc-store-ipc-sysctls-in-the-ipc-namespace.patch +ipc-check-permissions-for-checkpoint_restart-sysctls.patch +sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch +sysctl-allow-to-change-limits-for-posix-messages-que.patch +sysctl-treewide-drop-unused-argument-ctl_table_root-.patch +sysctl-always-initialize-i_uid-i_gid.patch +ext4-make-ext4_es_insert_extent-return-void.patch +ext4-refactor-ext4_da_map_blocks.patch +ext4-convert-to-exclusive-lock-while-inserting-delal.patch +ext4-factor-out-a-common-helper-to-query-extent-map.patch +ext4-check-the-extent-status-again-before-inserting-.patch +soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch +drivers-soc-xilinx-check-return-status-of-get_api_ve.patch +leds-trigger-use-rcu-to-protect-the-led_cdevs-list.patch +leds-trigger-remove-unused-function-led_trigger_rena.patch +leds-trigger-store-brightness-set-by-led_trigger_eve.patch +leds-trigger-call-synchronize_rcu-before-calling-tri.patch +leds-triggers-flush-pending-brightness-before-activa.patch +f2fs-introduce-f2fs_ipu_honor_opu_write-ipu-policy.patch +f2fs-fix-to-avoid-use-ssr-allocate-when-do-defragmen.patch +f2fs-assign-curseg_all_data_atgc-if-blkaddr-is-valid.patch +irqdomain-use-return-value-of-strreplace.patch +irqdomain-fixed-unbalanced-fwnode-get-and-put.patch +genirq-allow-the-pm-device-to-originate-from-irq-dom.patch +irqchip-imx-irqsteer-constify-irq_chip-struct.patch +irqchip-imx-irqsteer-add-runtime-pm-support.patch +irqchip-imx-irqsteer-handle-runtime-power-management.patch +drm-dp_mst-fix-all-mstb-marked-as-not-probed-after-s.patch +remoteproc-imx_rproc-fix-refcount-mistake-in-imx_rpr.patch +mips-loongson64-dts-add-rtc-support-to-loongson-2k10.patch +mips-loongson64-dts-fix-pcie-port-nodes-for-ls7a.patch +mips-dts-loongson-fix-liointc-irq-polarity.patch +mips-dts-loongson-fix-ls2k1000-rtc-interrupt.patch diff --git a/queue-5.15/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch b/queue-5.15/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch new file mode 100644 index 00000000000..5f4163244ab --- /dev/null +++ b/queue-5.15/soc-xilinx-move-pm_init_finalize-to-zynqmp_pm_domain.patch @@ -0,0 +1,92 @@ +From f7d72a3595eb916bdc5b65cd8a30efc41c32555a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 25 Aug 2021 17:03:10 +0200 +Subject: soc: xilinx: move PM_INIT_FINALIZE to zynqmp_pm_domains driver + +From: Michael Tretter + +[ Upstream commit 7fd890b89dea55eb5866640eb8befad26d558161 ] + +PM_INIT_FINALIZE tells the PMU FW that Linux is able to handle the power +management nodes that are provided by the PMU FW. Nodes that are not +requested are shut down after this call. + +Calling PM_INIT_FINALIZE from the zynqmp_power driver is wrong. The PM +node request mechanism is implemented in the zynqmp_pm_domains driver, +which must also call PM_INIT_FINALIZE. + +Due to the behavior of the PMU FW, all devices must be powered up before +PM_INIT_FINALIZE is called, because otherwise the devices might +misbehave. Calling PM_INIT_FINALIZE from the sync_state device callback +ensures that all users probed successfully before the PMU FW is allowed +to power off unused domains. + +Signed-off-by: Michael Tretter +Acked-by: Michal Simek +Acked-by: Rajan Vaja +Link: https://lore.kernel.org/r/20210825150313.4033156-2-m.tretter@pengutronix.de +Signed-off-by: Michal Simek +Stable-dep-of: 9b003e14801c ("drivers: soc: xilinx: check return status of get_api_version()") +Signed-off-by: Sasha Levin +--- + drivers/soc/xilinx/zynqmp_pm_domains.c | 16 ++++++++++++++++ + drivers/soc/xilinx/zynqmp_power.c | 1 - + 2 files changed, 16 insertions(+), 1 deletion(-) + +diff --git a/drivers/soc/xilinx/zynqmp_pm_domains.c b/drivers/soc/xilinx/zynqmp_pm_domains.c +index 226d343f0a6a5..81e8e10f10929 100644 +--- a/drivers/soc/xilinx/zynqmp_pm_domains.c ++++ b/drivers/soc/xilinx/zynqmp_pm_domains.c +@@ -152,11 +152,17 @@ static int zynqmp_gpd_power_off(struct generic_pm_domain *domain) + static int zynqmp_gpd_attach_dev(struct generic_pm_domain *domain, + struct device *dev) + { ++ struct device_link *link; + int ret; + struct zynqmp_pm_domain *pd; + + pd = container_of(domain, struct zynqmp_pm_domain, gpd); + ++ link = device_link_add(dev, &domain->dev, DL_FLAG_SYNC_STATE_ONLY); ++ if (!link) ++ dev_dbg(&domain->dev, "failed to create device link for %s\n", ++ dev_name(dev)); ++ + /* If this is not the first device to attach there is nothing to do */ + if (domain->device_count) + return 0; +@@ -299,9 +305,19 @@ static int zynqmp_gpd_remove(struct platform_device *pdev) + return 0; + } + ++static void zynqmp_gpd_sync_state(struct device *dev) ++{ ++ int ret; ++ ++ ret = zynqmp_pm_init_finalize(); ++ if (ret) ++ dev_warn(dev, "failed to release power management to firmware\n"); ++} ++ + static struct platform_driver zynqmp_power_domain_driver = { + .driver = { + .name = "zynqmp_power_controller", ++ .sync_state = zynqmp_gpd_sync_state, + }, + .probe = zynqmp_gpd_probe, + .remove = zynqmp_gpd_remove, +diff --git a/drivers/soc/xilinx/zynqmp_power.c b/drivers/soc/xilinx/zynqmp_power.c +index c556623dae024..f8c301984d4f9 100644 +--- a/drivers/soc/xilinx/zynqmp_power.c ++++ b/drivers/soc/xilinx/zynqmp_power.c +@@ -178,7 +178,6 @@ static int zynqmp_pm_probe(struct platform_device *pdev) + u32 pm_api_version; + struct mbox_client *client; + +- zynqmp_pm_init_finalize(); + zynqmp_pm_get_api_version(&pm_api_version); + + /* Check PM API version number */ +-- +2.43.0 + diff --git a/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch b/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch new file mode 100644 index 00000000000..d92f228b1c4 --- /dev/null +++ b/queue-5.15/sysctl-allow-change-system-v-ipc-sysctls-inside-ipc-.patch @@ -0,0 +1,140 @@ +From 696cacd9d0e086e88f59a5d41fbe5a7e64e2d281 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Jan 2024 15:46:41 +0000 +Subject: sysctl: allow change system v ipc sysctls inside ipc namespace + +From: Alexey Gladkov + +[ Upstream commit 50ec499b9a43e46200c9f7b7d723ab2e4af540b3 ] + +Patch series "Allow to change ipc/mq sysctls inside ipc namespace", v3. + +Right now ipc and mq limits count as per ipc namespace, but only real root +can change them. By default, the current values of these limits are such +that it can only be reduced. Since only root can change the values, it is +impossible to reduce these limits in the rootless container. + +We can allow limit changes within ipc namespace because mq parameters are +limited by RLIMIT_MSGQUEUE and ipc parameters are not limited to anything +other than cgroups. + +This patch (of 3): + +Rootless containers are not allowed to modify kernel IPC parameters. + +All default limits are set to such high values that in fact there are no +limits at all. All limits are not inherited and are initialized to +default values when a new ipc_namespace is created. + +For new ipc_namespace: + +size_t ipc_ns.shm_ctlmax = SHMMAX; // (ULONG_MAX - (1UL << 24)) +size_t ipc_ns.shm_ctlall = SHMALL; // (ULONG_MAX - (1UL << 24)) +int ipc_ns.shm_ctlmni = IPCMNI; // (1 << 15) +int ipc_ns.shm_rmid_forced = 0; +unsigned int ipc_ns.msg_ctlmax = MSGMAX; // 8192 +unsigned int ipc_ns.msg_ctlmni = MSGMNI; // 32000 +unsigned int ipc_ns.msg_ctlmnb = MSGMNB; // 16384 + +The shm_tot (total amount of shared pages) has also ceased to be global, +it is located in ipc_namespace and is not inherited from anywhere. + +In such conditions, it cannot be said that these limits limit anything. +The real limiter for them is cgroups. + +If we allow rootless containers to change these parameters, then it can +only be reduced. + +Link: https://lkml.kernel.org/r/cover.1705333426.git.legion@kernel.org +Link: https://lkml.kernel.org/r/d2f4603305cbfed58a24755aa61d027314b73a45.1705333426.git.legion@kernel.org +Signed-off-by: Alexey Gladkov +Signed-off-by: Eric W. Biederman +Link: https://lkml.kernel.org/r/e2d84d3ec0172cfff759e6065da84ce0cc2736f8.1663756794.git.legion@kernel.org +Cc: Christian Brauner +Cc: Joel Granados +Cc: Kees Cook +Cc: Luis Chamberlain +Cc: Manfred Spraul +Cc: Davidlohr Bueso +Signed-off-by: Andrew Morton +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/ipc_sysctl.c | 37 +++++++++++++++++++++++++++++++++++-- + 1 file changed, 35 insertions(+), 2 deletions(-) + +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index a2b871d006da7..2864fd7fafaac 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -14,6 +14,7 @@ + #include + #include + #include ++#include + #include "util.h" + + static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, +@@ -198,25 +199,57 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->ipc_set == set; + } + ++static void ipc_set_ownership(struct ctl_table_header *head, ++ struct ctl_table *table, ++ kuid_t *uid, kgid_t *gid) ++{ ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, ipc_set); ++ ++ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); ++ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); ++ ++ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; ++ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; ++} ++ + static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table) + { + int mode = table->mode; + + #ifdef CONFIG_CHECKPOINT_RESTORE +- struct ipc_namespace *ns = current->nsproxy->ipc_ns; ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, ipc_set); + + if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) || + (table->data == &ns->ids[IPC_MSG_IDS].next_id) || + (table->data == &ns->ids[IPC_SHM_IDS].next_id)) && + checkpoint_restore_ns_capable(ns->user_ns)) + mode = 0666; ++ else + #endif +- return mode; ++ { ++ kuid_t ns_root_uid; ++ kgid_t ns_root_gid; ++ ++ ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ++ if (uid_eq(current_euid(), ns_root_uid)) ++ mode >>= 6; ++ ++ else if (in_egroup_p(ns_root_gid)) ++ mode >>= 3; ++ } ++ ++ mode &= 7; ++ ++ return (mode << 6) | (mode << 3) | mode; + } + + static struct ctl_table_root set_root = { + .lookup = set_lookup, + .permissions = ipc_permissions, ++ .set_ownership = ipc_set_ownership, + }; + + bool setup_ipc_sysctls(struct ipc_namespace *ns) +-- +2.43.0 + diff --git a/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch b/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch new file mode 100644 index 00000000000..af54c4d2b02 --- /dev/null +++ b/queue-5.15/sysctl-allow-to-change-limits-for-posix-messages-que.patch @@ -0,0 +1,95 @@ +From bf1288be3757c0a1ba8305c9c39990c649695786 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 15 Jan 2024 15:46:43 +0000 +Subject: sysctl: allow to change limits for posix messages queues + +From: Alexey Gladkov + +[ Upstream commit f9436a5d0497f759330d07e1189565edd4456be8 ] + +All parameters of posix messages queues (queues_max/msg_max/msgsize_max) +end up being limited by RLIMIT_MSGQUEUE. The code in mqueue_get_inode is +where that limiting happens. + +The RLIMIT_MSGQUEUE is bound to the user namespace and is counted +hierarchically. + +We can allow root in the user namespace to modify the posix messages +queues parameters. + +Link: https://lkml.kernel.org/r/6ad67f23d1459a4f4339f74aa73bac0ecf3995e1.1705333426.git.legion@kernel.org +Signed-off-by: Alexey Gladkov +Signed-off-by: Eric W. Biederman +Link: https://lkml.kernel.org/r/7eb21211c8622e91d226e63416b1b93c079f60ee.1663756794.git.legion@kernel.org +Cc: Christian Brauner +Cc: Davidlohr Bueso +Cc: Joel Granados +Cc: Kees Cook +Cc: Luis Chamberlain +Cc: Manfred Spraul +Signed-off-by: Andrew Morton +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + ipc/mq_sysctl.c | 36 ++++++++++++++++++++++++++++++++++++ + 1 file changed, 36 insertions(+) + +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index fbf6a8b93a265..ce03930aced55 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -12,6 +12,7 @@ + #include + #include + #include ++#include + + static int msg_max_limit_min = MIN_MSGMAX; + static int msg_max_limit_max = HARD_MSGMAX; +@@ -76,8 +77,43 @@ static int set_is_seen(struct ctl_table_set *set) + return ¤t->nsproxy->ipc_ns->mq_set == set; + } + ++static void mq_set_ownership(struct ctl_table_header *head, ++ struct ctl_table *table, ++ kuid_t *uid, kgid_t *gid) ++{ ++ struct ipc_namespace *ns = ++ container_of(head->set, struct ipc_namespace, mq_set); ++ ++ kuid_t ns_root_uid = make_kuid(ns->user_ns, 0); ++ kgid_t ns_root_gid = make_kgid(ns->user_ns, 0); ++ ++ *uid = uid_valid(ns_root_uid) ? ns_root_uid : GLOBAL_ROOT_UID; ++ *gid = gid_valid(ns_root_gid) ? ns_root_gid : GLOBAL_ROOT_GID; ++} ++ ++static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table) ++{ ++ int mode = table->mode; ++ kuid_t ns_root_uid; ++ kgid_t ns_root_gid; ++ ++ mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ++ if (uid_eq(current_euid(), ns_root_uid)) ++ mode >>= 6; ++ ++ else if (in_egroup_p(ns_root_gid)) ++ mode >>= 3; ++ ++ mode &= 7; ++ ++ return (mode << 6) | (mode << 3) | mode; ++} ++ + static struct ctl_table_root set_root = { + .lookup = set_lookup, ++ .permissions = mq_permissions, ++ .set_ownership = mq_set_ownership, + }; + + bool setup_mq_sysctls(struct ipc_namespace *ns) +-- +2.43.0 + diff --git a/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch b/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch new file mode 100644 index 00000000000..f9aead22f23 --- /dev/null +++ b/queue-5.15/sysctl-always-initialize-i_uid-i_gid.patch @@ -0,0 +1,52 @@ +From 692f0a1c18b96128339a77efdc7c9f533740bcd1 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 2 Apr 2024 23:10:34 +0200 +Subject: sysctl: always initialize i_uid/i_gid +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 98ca62ba9e2be5863c7d069f84f7166b45a5b2f4 ] + +Always initialize i_uid/i_gid inside the sysfs core so set_ownership() +can safely skip setting them. + +Commit 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of +i_uid/i_gid on /proc/sys inodes.") added defaults for i_uid/i_gid when +set_ownership() was not implemented. It also missed adjusting +net_ctl_set_ownership() to use the same default values in case the +computation of a better value failed. + +Fixes: 5ec27ec735ba ("fs/proc/proc_sysctl.c: fix the default values of i_uid/i_gid on /proc/sys inodes.") +Cc: stable@vger.kernel.org +Signed-off-by: Thomas Weißschuh +Signed-off-by: Joel Granados +Signed-off-by: Sasha Levin +--- + fs/proc/proc_sysctl.c | 6 ++---- + 1 file changed, 2 insertions(+), 4 deletions(-) + +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index 4288fa4614eb2..6dd7efd8562e2 100644 +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -466,12 +466,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, + make_empty_dir_inode(inode); + } + ++ inode->i_uid = GLOBAL_ROOT_UID; ++ inode->i_gid = GLOBAL_ROOT_GID; + if (root->set_ownership) + root->set_ownership(head, &inode->i_uid, &inode->i_gid); +- else { +- inode->i_uid = GLOBAL_ROOT_UID; +- inode->i_gid = GLOBAL_ROOT_GID; +- } + + return inode; + } +-- +2.43.0 + diff --git a/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch b/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch new file mode 100644 index 00000000000..fed43d6fac9 --- /dev/null +++ b/queue-5.15/sysctl-treewide-drop-unused-argument-ctl_table_root-.patch @@ -0,0 +1,127 @@ +From d734e3ca18c278370bbab4db7f20821f5fbba65b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 15 Mar 2024 19:11:30 +0100 +Subject: sysctl: treewide: drop unused argument + ctl_table_root::set_ownership(table) +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh + +[ Upstream commit 520713a93d550406dae14d49cdb8778d70cecdfd ] + +Remove the 'table' argument from set_ownership as it is never used. This +change is a step towards putting "struct ctl_table" into .rodata and +eventually having sysctl core only use "const struct ctl_table". + +The patch was created with the following coccinelle script: + + @@ + identifier func, head, table, uid, gid; + @@ + + void func( + struct ctl_table_header *head, + - struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { ... } + +No additional occurrences of 'set_ownership' were found after doing a +tree-wide search. + +Reviewed-by: Joel Granados +Signed-off-by: Thomas Weißschuh +Signed-off-by: Joel Granados +Stable-dep-of: 98ca62ba9e2b ("sysctl: always initialize i_uid/i_gid") +Signed-off-by: Sasha Levin +--- + fs/proc/proc_sysctl.c | 2 +- + include/linux/sysctl.h | 1 - + ipc/ipc_sysctl.c | 3 +-- + ipc/mq_sysctl.c | 3 +-- + net/sysctl_net.c | 1 - + 5 files changed, 3 insertions(+), 7 deletions(-) + +diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c +index 4192fe6ec3da2..4288fa4614eb2 100644 +--- a/fs/proc/proc_sysctl.c ++++ b/fs/proc/proc_sysctl.c +@@ -467,7 +467,7 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, + } + + if (root->set_ownership) +- root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); ++ root->set_ownership(head, &inode->i_uid, &inode->i_gid); + else { + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; +diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h +index 32d79ef906e51..153755e07017f 100644 +--- a/include/linux/sysctl.h ++++ b/include/linux/sysctl.h +@@ -173,7 +173,6 @@ struct ctl_table_root { + struct ctl_table_set default_set; + struct ctl_table_set *(*lookup)(struct ctl_table_root *root); + void (*set_ownership)(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid); + int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); + }; +diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c +index 2864fd7fafaac..c118d8293d3b6 100644 +--- a/ipc/ipc_sysctl.c ++++ b/ipc/ipc_sysctl.c +@@ -200,7 +200,6 @@ static int set_is_seen(struct ctl_table_set *set) + } + + static void ipc_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct ipc_namespace *ns = +@@ -232,7 +231,7 @@ static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *tabl + kuid_t ns_root_uid; + kgid_t ns_root_gid; + +- ipc_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ ipc_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; +diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c +index ce03930aced55..c960691fc24d9 100644 +--- a/ipc/mq_sysctl.c ++++ b/ipc/mq_sysctl.c +@@ -78,7 +78,6 @@ static int set_is_seen(struct ctl_table_set *set) + } + + static void mq_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct ipc_namespace *ns = +@@ -97,7 +96,7 @@ static int mq_permissions(struct ctl_table_header *head, struct ctl_table *table + kuid_t ns_root_uid; + kgid_t ns_root_gid; + +- mq_set_ownership(head, table, &ns_root_uid, &ns_root_gid); ++ mq_set_ownership(head, &ns_root_uid, &ns_root_gid); + + if (uid_eq(current_euid(), ns_root_uid)) + mode >>= 6; +diff --git a/net/sysctl_net.c b/net/sysctl_net.c +index f6cb0d4d114cd..95e9f40aeff0b 100644 +--- a/net/sysctl_net.c ++++ b/net/sysctl_net.c +@@ -54,7 +54,6 @@ static int net_ctl_permissions(struct ctl_table_header *head, + } + + static void net_ctl_set_ownership(struct ctl_table_header *head, +- struct ctl_table *table, + kuid_t *uid, kgid_t *gid) + { + struct net *net = container_of(head->set, struct net, sysctls); +-- +2.43.0 + -- 2.47.3