From: Sasha Levin Date: Sat, 14 Jun 2025 13:34:10 +0000 (-0400) Subject: Fixes for 6.1 X-Git-Tag: v6.6.94~69 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=0209fec1fa18207b4e4fa7fe4571c947ab1c3a8e;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 6.1 Signed-off-by: Sasha Levin --- diff --git a/queue-6.1/arm64-dts-marvell-udpu-define-pinctrl-state-for-alar.patch b/queue-6.1/arm64-dts-marvell-udpu-define-pinctrl-state-for-alar.patch new file mode 100644 index 0000000000..a87ac00329 --- /dev/null +++ b/queue-6.1/arm64-dts-marvell-udpu-define-pinctrl-state-for-alar.patch @@ -0,0 +1,81 @@ +From 6685eba35044472752e5c19bda38dee2c4081678 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 9 May 2025 15:48:52 +0200 +Subject: arm64: dts: marvell: uDPU: define pinctrl state for alarm LEDs + +From: Gabor Juhos + +[ Upstream commit b04f0d89e880bc2cca6a5c73cf287082c91878da ] + +The two alarm LEDs of on the uDPU board are stopped working since +commit 78efa53e715e ("leds: Init leds class earlier"). + +The LEDs are driven by the GPIO{15,16} pins of the North Bridge +GPIO controller. These pins are part of the 'spi_quad' pin group +for which the 'spi' function is selected via the default pinctrl +state of the 'spi' node. This is wrong however, since in order to +allow controlling the LEDs, the pins should use the 'gpio' function. + +Before the commit mentined above, the 'spi' function is selected +first by the pinctrl core before probing the spi driver, but then +it gets overridden to 'gpio' implicitly via the +devm_gpiod_get_index_optional() call from the 'leds-gpio' driver. + +After the commit, the LED subsystem gets initialized before the +SPI subsystem, so the function of the pin group remains 'spi' +which in turn prevents controlling of the LEDs. + +Despite the change of the initialization order, the root cause is +that the pinctrl state definition is wrong since its initial commit +0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board"), + +To fix the problem, override the function in the 'spi_quad_pins' +node to 'gpio' and move the pinctrl state definition from the +'spi' node into the 'leds' node. + +Cc: stable@vger.kernel.org # needs adjustment for < 6.1 +Fixes: 0d45062cfc89 ("arm64: dts: marvell: Add device tree for uDPU board") +Signed-off-by: Gabor Juhos +Signed-off-by: Imre Kaloz +Signed-off-by: Gregory CLEMENT +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +index 3f79923376fb2..37244e8816d9e 100644 +--- a/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi ++++ b/arch/arm64/boot/dts/marvell/armada-3720-uDPU.dtsi +@@ -26,6 +26,8 @@ memory@0 { + + leds { + compatible = "gpio-leds"; ++ pinctrl-names = "default"; ++ pinctrl-0 = <&spi_quad_pins>; + + led-power1 { + label = "udpu:green:power"; +@@ -82,8 +84,6 @@ &sdhci0 { + + &spi0 { + status = "okay"; +- pinctrl-names = "default"; +- pinctrl-0 = <&spi_quad_pins>; + + flash@0 { + compatible = "jedec,spi-nor"; +@@ -108,6 +108,10 @@ partition@180000 { + }; + }; + ++&spi_quad_pins { ++ function = "gpio"; ++}; ++ + &pinctrl_nb { + i2c2_recovery_pins: i2c2-recovery-pins { + groups = "i2c2"; +-- +2.39.5 + diff --git a/queue-6.1/arm64-dts-ti-k3-am65-main-add-missing-taps-to-sdhci0.patch b/queue-6.1/arm64-dts-ti-k3-am65-main-add-missing-taps-to-sdhci0.patch new file mode 100644 index 0000000000..2a7808bc86 --- /dev/null +++ b/queue-6.1/arm64-dts-ti-k3-am65-main-add-missing-taps-to-sdhci0.patch @@ -0,0 +1,41 @@ +From b1a25cd45b030a88989e717d0e913c4303167b4a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 29 Apr 2025 12:30:08 -0500 +Subject: arm64: dts: ti: k3-am65-main: Add missing taps to sdhci0 + +From: Judith Mendez + +[ Upstream commit f55c9f087cc2e2252d44ffd9d58def2066fc176e ] + +For am65x, add missing ITAPDLYSEL values for Default Speed and High +Speed SDR modes to sdhci0 node according to the device datasheet [0]. + +[0] https://www.ti.com/lit/gpn/am6548 + +Fixes: eac99d38f861 ("arm64: dts: ti: k3-am654-main: Update otap-del-sel values") +Cc: stable@vger.kernel.org +Signed-off-by: Judith Mendez +Reviewed-by: Moteen Shah +Link: https://lore.kernel.org/r/20250429173009.33994-1-jm@ti.com +Signed-off-by: Nishanth Menon +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +index c09457fef2db0..ebd8434c6b60b 100644 +--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +@@ -274,6 +274,8 @@ sdhci0: mmc@4f80000 { + ti,otap-del-sel-ddr50 = <0x5>; + ti,otap-del-sel-ddr52 = <0x5>; + ti,otap-del-sel-hs200 = <0x5>; ++ ti,itap-del-sel-legacy = <0xa>; ++ ti,itap-del-sel-mmc-hs = <0x1>; + ti,itap-del-sel-ddr52 = <0x0>; + dma-coherent; + }; +-- +2.39.5 + diff --git a/queue-6.1/arm64-dts-ti-k3-am65-main-drop-deprecated-ti-otap-de.patch b/queue-6.1/arm64-dts-ti-k3-am65-main-drop-deprecated-ti-otap-de.patch new file mode 100644 index 0000000000..f0f2032939 --- /dev/null +++ b/queue-6.1/arm64-dts-ti-k3-am65-main-drop-deprecated-ti-otap-de.patch @@ -0,0 +1,38 @@ +From cdfa476be116fbca08349e623316c93d261d152e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 7 Jun 2023 08:20:42 -0500 +Subject: arm64: dts: ti: k3-am65-main: Drop deprecated ti,otap-del-sel + property + +From: Nishanth Menon + +[ Upstream commit 2b9bb988742d1794e78d4297a99658f38477eedd ] + +ti,otap-del-sel has been deprecated in favor of ti,otap-del-sel-legacy. + +Drop the duplicate and misleading ti,otap-del-sel property. + +Signed-off-by: Nishanth Menon +Link: https://lore.kernel.org/r/20230607132043.3932726-3-nm@ti.com +Signed-off-by: Vignesh Raghavendra +Stable-dep-of: f55c9f087cc2 ("arm64: dts: ti: k3-am65-main: Add missing taps to sdhci0") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +index 83dd8993027ab..9854cf0e7f7b4 100644 +--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +@@ -295,7 +295,6 @@ sdhci1: mmc@4fa0000 { + ti,otap-del-sel-ddr52 = <0x4>; + ti,otap-del-sel-hs200 = <0x7>; + ti,clkbuf-sel = <0x7>; +- ti,otap-del-sel = <0x2>; + ti,trm-icp = <0x8>; + dma-coherent; + }; +-- +2.39.5 + diff --git a/queue-6.1/arm64-dts-ti-k3-am65-main-fix-sdhci-node-properties.patch b/queue-6.1/arm64-dts-ti-k3-am65-main-fix-sdhci-node-properties.patch new file mode 100644 index 0000000000..9a7880bc2f --- /dev/null +++ b/queue-6.1/arm64-dts-ti-k3-am65-main-fix-sdhci-node-properties.patch @@ -0,0 +1,83 @@ +From 44a4777b0a21a4e35476d799b9b48981909bf372 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 23 Apr 2024 10:17:28 -0500 +Subject: arm64: dts: ti: k3-am65-main: Fix sdhci node properties + +From: Judith Mendez + +[ Upstream commit 8ffe9cb889f2b831a9d5bbb1f7ad42d30e31170f ] + +Update otap-del-sel properties as per datasheet [0]. + +Add missing clkbuf-sel and itap-del-sel values also as per +datasheet [0]. + +Move clkbuf-sel and ti,trm-icp above the otap-del-sel properties +so the sdhci nodes could be more uniform across platforms. + +[0] https://www.ti.com/lit/ds/symlink/am6548.pdf + +Fixes: eac99d38f861 ("arm64: dts: ti: k3-am654-main: Update otap-del-sel values") +Fixes: d7600d070fb0 ("arm64: dts: ti: k3-am65-main: Add support for sdhci1") +Signed-off-by: Judith Mendez +Link: https://lore.kernel.org/r/20240423151732.3541894-2-jm@ti.com +Signed-off-by: Nishanth Menon +Stable-dep-of: f55c9f087cc2 ("arm64: dts: ti: k3-am65-main: Add missing taps to sdhci0") +Signed-off-by: Sasha Levin +--- + arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 17 +++++++++++------ + 1 file changed, 11 insertions(+), 6 deletions(-) + +diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +index 9854cf0e7f7b4..c09457fef2db0 100644 +--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi ++++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi +@@ -262,6 +262,8 @@ sdhci0: mmc@4f80000 { + interrupts = ; + mmc-ddr-1_8v; + mmc-hs200-1_8v; ++ ti,clkbuf-sel = <0x7>; ++ ti,trm-icp = <0x8>; + ti,otap-del-sel-legacy = <0x0>; + ti,otap-del-sel-mmc-hs = <0x0>; + ti,otap-del-sel-sd-hs = <0x0>; +@@ -272,8 +274,7 @@ sdhci0: mmc@4f80000 { + ti,otap-del-sel-ddr50 = <0x5>; + ti,otap-del-sel-ddr52 = <0x5>; + ti,otap-del-sel-hs200 = <0x5>; +- ti,otap-del-sel-hs400 = <0x0>; +- ti,trm-icp = <0x8>; ++ ti,itap-del-sel-ddr52 = <0x0>; + dma-coherent; + }; + +@@ -284,18 +285,22 @@ sdhci1: mmc@4fa0000 { + clocks = <&k3_clks 48 0>, <&k3_clks 48 1>; + clock-names = "clk_ahb", "clk_xin"; + interrupts = ; ++ ti,clkbuf-sel = <0x7>; ++ ti,trm-icp = <0x8>; + ti,otap-del-sel-legacy = <0x0>; + ti,otap-del-sel-mmc-hs = <0x0>; + ti,otap-del-sel-sd-hs = <0x0>; +- ti,otap-del-sel-sdr12 = <0x0>; +- ti,otap-del-sel-sdr25 = <0x0>; ++ ti,otap-del-sel-sdr12 = <0xf>; ++ ti,otap-del-sel-sdr25 = <0xf>; + ti,otap-del-sel-sdr50 = <0x8>; + ti,otap-del-sel-sdr104 = <0x7>; + ti,otap-del-sel-ddr50 = <0x4>; + ti,otap-del-sel-ddr52 = <0x4>; + ti,otap-del-sel-hs200 = <0x7>; +- ti,clkbuf-sel = <0x7>; +- ti,trm-icp = <0x8>; ++ ti,itap-del-sel-legacy = <0xa>; ++ ti,itap-del-sel-sd-hs = <0x1>; ++ ti,itap-del-sel-sdr12 = <0xa>; ++ ti,itap-del-sel-sdr25 = <0x1>; + dma-coherent; + }; + +-- +2.39.5 + diff --git a/queue-6.1/input-synaptics-rmi-fix-crash-with-unsupported-versi.patch b/queue-6.1/input-synaptics-rmi-fix-crash-with-unsupported-versi.patch new file mode 100644 index 0000000000..e12a73ccea --- /dev/null +++ b/queue-6.1/input-synaptics-rmi-fix-crash-with-unsupported-versi.patch @@ -0,0 +1,262 @@ +From 89838f006e4ac5463b31b6855900353c8ff3d74b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 5 May 2025 15:49:59 -0700 +Subject: Input: synaptics-rmi - fix crash with unsupported versions of F34 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Dmitry Torokhov + +[ Upstream commit ca39500f6af9cfe6823dc5aa8fbaed788d6e35b2 ] + +Sysfs interface for updating firmware for RMI devices is available even +when F34 probe fails. The code checks for presence of F34 "container" +pointer and then tries to use the function data attached to the +sub-device. F34 assigns the function data early, before it knows if +probe will succeed, leaving behind a stale pointer. + +Fix this by expanding checks to not only test for presence of F34 +"container" but also check if there is driver data assigned to the +sub-device, and call dev_set_drvdata() only after we are certain that +probe is successful. + +This is not a complete fix, since F34 will be freed during firmware +update, so there is still a race when fetching and accessing this +pointer. This race will be addressed in follow-up changes. + +Reported-by: Hanno Böck +Fixes: 29fd0ec2bdbe ("Input: synaptics-rmi4 - add support for F34 device reflash") +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/aBlAl6sGulam-Qcx@google.com +Signed-off-by: Dmitry Torokhov +Signed-off-by: Sasha Levin +--- + drivers/input/rmi4/rmi_f34.c | 135 ++++++++++++++++++++--------------- + 1 file changed, 76 insertions(+), 59 deletions(-) + +diff --git a/drivers/input/rmi4/rmi_f34.c b/drivers/input/rmi4/rmi_f34.c +index 0d9a5756e3f59..cae1e41664921 100644 +--- a/drivers/input/rmi4/rmi_f34.c ++++ b/drivers/input/rmi4/rmi_f34.c +@@ -4,6 +4,7 @@ + * Copyright (C) 2016 Zodiac Inflight Innovations + */ + ++#include "linux/device.h" + #include + #include + #include +@@ -298,39 +299,30 @@ static int rmi_f34_update_firmware(struct f34_data *f34, + return ret; + } + +-static int rmi_f34_status(struct rmi_function *fn) +-{ +- struct f34_data *f34 = dev_get_drvdata(&fn->dev); +- +- /* +- * The status is the percentage complete, or once complete, +- * zero for success or a negative return code. +- */ +- return f34->update_status; +-} +- + static ssize_t rmi_driver_bootloader_id_show(struct device *dev, + struct device_attribute *dattr, + char *buf) + { + struct rmi_driver_data *data = dev_get_drvdata(dev); +- struct rmi_function *fn = data->f34_container; ++ struct rmi_function *fn; + struct f34_data *f34; + +- if (fn) { +- f34 = dev_get_drvdata(&fn->dev); +- +- if (f34->bl_version == 5) +- return sysfs_emit(buf, "%c%c\n", +- f34->bootloader_id[0], +- f34->bootloader_id[1]); +- else +- return sysfs_emit(buf, "V%d.%d\n", +- f34->bootloader_id[1], +- f34->bootloader_id[0]); +- } ++ fn = data->f34_container; ++ if (!fn) ++ return -ENODEV; + +- return 0; ++ f34 = dev_get_drvdata(&fn->dev); ++ if (!f34) ++ return -ENODEV; ++ ++ if (f34->bl_version == 5) ++ return sysfs_emit(buf, "%c%c\n", ++ f34->bootloader_id[0], ++ f34->bootloader_id[1]); ++ else ++ return sysfs_emit(buf, "V%d.%d\n", ++ f34->bootloader_id[1], ++ f34->bootloader_id[0]); + } + + static DEVICE_ATTR(bootloader_id, 0444, rmi_driver_bootloader_id_show, NULL); +@@ -343,13 +335,16 @@ static ssize_t rmi_driver_configuration_id_show(struct device *dev, + struct rmi_function *fn = data->f34_container; + struct f34_data *f34; + +- if (fn) { +- f34 = dev_get_drvdata(&fn->dev); ++ fn = data->f34_container; ++ if (!fn) ++ return -ENODEV; + +- return sysfs_emit(buf, "%s\n", f34->configuration_id); +- } ++ f34 = dev_get_drvdata(&fn->dev); ++ if (!f34) ++ return -ENODEV; + +- return 0; ++ ++ return sysfs_emit(buf, "%s\n", f34->configuration_id); + } + + static DEVICE_ATTR(configuration_id, 0444, +@@ -365,10 +360,14 @@ static int rmi_firmware_update(struct rmi_driver_data *data, + + if (!data->f34_container) { + dev_warn(dev, "%s: No F34 present!\n", __func__); +- return -EINVAL; ++ return -ENODEV; + } + + f34 = dev_get_drvdata(&data->f34_container->dev); ++ if (!f34) { ++ dev_warn(dev, "%s: No valid F34 present!\n", __func__); ++ return -ENODEV; ++ } + + if (f34->bl_version >= 7) { + if (data->pdt_props & HAS_BSR) { +@@ -494,10 +493,18 @@ static ssize_t rmi_driver_update_fw_status_show(struct device *dev, + char *buf) + { + struct rmi_driver_data *data = dev_get_drvdata(dev); +- int update_status = 0; ++ struct f34_data *f34; ++ int update_status = -ENODEV; + +- if (data->f34_container) +- update_status = rmi_f34_status(data->f34_container); ++ /* ++ * The status is the percentage complete, or once complete, ++ * zero for success or a negative return code. ++ */ ++ if (data->f34_container) { ++ f34 = dev_get_drvdata(&data->f34_container->dev); ++ if (f34) ++ update_status = f34->update_status; ++ } + + return sysfs_emit(buf, "%d\n", update_status); + } +@@ -517,33 +524,21 @@ static const struct attribute_group rmi_firmware_attr_group = { + .attrs = rmi_firmware_attrs, + }; + +-static int rmi_f34_probe(struct rmi_function *fn) ++static int rmi_f34v5_probe(struct f34_data *f34) + { +- struct f34_data *f34; +- unsigned char f34_queries[9]; ++ struct rmi_function *fn = f34->fn; ++ u8 f34_queries[9]; + bool has_config_id; +- u8 version = fn->fd.function_version; +- int ret; +- +- f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); +- if (!f34) +- return -ENOMEM; +- +- f34->fn = fn; +- dev_set_drvdata(&fn->dev, f34); +- +- /* v5 code only supported version 0, try V7 probe */ +- if (version > 0) +- return rmi_f34v7_probe(f34); ++ int error; + + f34->bl_version = 5; + +- ret = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, +- f34_queries, sizeof(f34_queries)); +- if (ret) { ++ error = rmi_read_block(fn->rmi_dev, fn->fd.query_base_addr, ++ f34_queries, sizeof(f34_queries)); ++ if (error) { + dev_err(&fn->dev, "%s: Failed to query properties\n", + __func__); +- return ret; ++ return error; + } + + snprintf(f34->bootloader_id, sizeof(f34->bootloader_id), +@@ -569,11 +564,11 @@ static int rmi_f34_probe(struct rmi_function *fn) + f34->v5.config_blocks); + + if (has_config_id) { +- ret = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, +- f34_queries, sizeof(f34_queries)); +- if (ret) { ++ error = rmi_read_block(fn->rmi_dev, fn->fd.control_base_addr, ++ f34_queries, sizeof(f34_queries)); ++ if (error) { + dev_err(&fn->dev, "Failed to read F34 config ID\n"); +- return ret; ++ return error; + } + + snprintf(f34->configuration_id, sizeof(f34->configuration_id), +@@ -582,12 +577,34 @@ static int rmi_f34_probe(struct rmi_function *fn) + f34_queries[2], f34_queries[3]); + + rmi_dbg(RMI_DEBUG_FN, &fn->dev, "Configuration ID: %s\n", +- f34->configuration_id); ++ f34->configuration_id); + } + + return 0; + } + ++static int rmi_f34_probe(struct rmi_function *fn) ++{ ++ struct f34_data *f34; ++ u8 version = fn->fd.function_version; ++ int error; ++ ++ f34 = devm_kzalloc(&fn->dev, sizeof(struct f34_data), GFP_KERNEL); ++ if (!f34) ++ return -ENOMEM; ++ ++ f34->fn = fn; ++ ++ /* v5 code only supported version 0 */ ++ error = version == 0 ? rmi_f34v5_probe(f34) : rmi_f34v7_probe(f34); ++ if (error) ++ return error; ++ ++ dev_set_drvdata(&fn->dev, f34); ++ ++ return 0; ++} ++ + int rmi_f34_create_sysfs(struct rmi_device *rmi_dev) + { + return sysfs_create_group(&rmi_dev->dev.kobj, &rmi_firmware_attr_group); +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-microchip-add-ksz9563-in-ksz_switch_ops-and-.patch b/queue-6.1/net-dsa-microchip-add-ksz9563-in-ksz_switch_ops-and-.patch new file mode 100644 index 0000000000..748d484eb1 --- /dev/null +++ b/queue-6.1/net-dsa-microchip-add-ksz9563-in-ksz_switch_ops-and-.patch @@ -0,0 +1,168 @@ +From f6678d46a8816b39812f7c1b0faf8e6b94024dc9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Nov 2022 14:59:18 +0530 +Subject: net: dsa: microchip: add ksz9563 in ksz_switch_ops and select based + on compatible string + +From: Rakesh Sankaranarayanan + +[ Upstream commit ef912fe443adfa15b88645b3bf58c92e5a364167 ] + +Add KSZ9563 inside ksz_switch_chips structure with +port_nirq as 3. KSZ9563 use KSZ9893 switch parameters +but port_nirq count is 3 for KSZ9563 whereas 2 for +KSZ9893. Add KSZ9563 inside ksz_switch_chips as a separate +member and from device tree map compatible string into +KSZ9563 inside ksz_spi.c and ksz9477_i2c.c. +Global Chip ID 1 and 2 registers read value 9893, select +sku based on Global Chip ID 4 Register which read 0x1c +for KSZ9563. + +Signed-off-by: Rakesh Sankaranarayanan +Signed-off-by: David S. Miller +Stable-dep-of: ba54bce747fa ("net: dsa: microchip: linearize skb for tail-tagging switches") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz9477.c | 3 ++- + drivers/net/dsa/microchip/ksz9477_i2c.c | 2 +- + drivers/net/dsa/microchip/ksz_common.c | 33 +++++++++++++++++++++++-- + drivers/net/dsa/microchip/ksz_common.h | 3 +++ + drivers/net/dsa/microchip/ksz_spi.c | 2 +- + 5 files changed, 38 insertions(+), 5 deletions(-) + +diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c +index b854ee425fcdd..6858f96c6b85a 100644 +--- a/drivers/net/dsa/microchip/ksz9477.c ++++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -193,7 +193,8 @@ int ksz9477_reset_switch(struct ksz_device *dev) + + /* KSZ9893 compatible chips do not support refclk configuration */ + if (dev->chip_id == KSZ9893_CHIP_ID || +- dev->chip_id == KSZ8563_CHIP_ID) ++ dev->chip_id == KSZ8563_CHIP_ID || ++ dev->chip_id == KSZ9563_CHIP_ID) + return 0; + + data8 = SW_ENABLE_REFCLKO; +diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c +index aae1dadef882d..7ea9803f7069a 100644 +--- a/drivers/net/dsa/microchip/ksz9477_i2c.c ++++ b/drivers/net/dsa/microchip/ksz9477_i2c.c +@@ -101,7 +101,7 @@ static const struct of_device_id ksz9477_dt_ids[] = { + }, + { + .compatible = "microchip,ksz9563", +- .data = &ksz_switch_chips[KSZ9893] ++ .data = &ksz_switch_chips[KSZ9563] + }, + { + .compatible = "microchip,ksz8563", +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 9dbe188f09c3c..effc66d1555da 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -1277,6 +1277,31 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .gbit_capable = {true, true, true}, + }, + ++ [KSZ9563] = { ++ .chip_id = KSZ9563_CHIP_ID, ++ .dev_name = "KSZ9563", ++ .num_vlans = 4096, ++ .num_alus = 4096, ++ .num_statics = 16, ++ .cpu_ports = 0x07, /* can be configured as cpu port */ ++ .port_cnt = 3, /* total port count */ ++ .port_nirqs = 3, ++ .ops = &ksz9477_dev_ops, ++ .mib_names = ksz9477_mib_names, ++ .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), ++ .reg_mib_cnt = MIB_COUNTER_NUM, ++ .regs = ksz9477_regs, ++ .masks = ksz9477_masks, ++ .shifts = ksz9477_shifts, ++ .xmii_ctrl0 = ksz9477_xmii_ctrl0, ++ .xmii_ctrl1 = ksz8795_xmii_ctrl1, /* Same as ksz8795 */ ++ .supports_mii = {false, false, true}, ++ .supports_rmii = {false, false, true}, ++ .supports_rgmii = {false, false, true}, ++ .internal_phy = {true, true, false}, ++ .gbit_capable = {true, true, true}, ++ }, ++ + [KSZ9567] = { + .chip_id = KSZ9567_CHIP_ID, + .dev_name = "KSZ9567", +@@ -2383,7 +2408,8 @@ static enum dsa_tag_protocol ksz_get_tag_protocol(struct dsa_switch *ds, + + if (dev->chip_id == KSZ8830_CHIP_ID || + dev->chip_id == KSZ8563_CHIP_ID || +- dev->chip_id == KSZ9893_CHIP_ID) ++ dev->chip_id == KSZ9893_CHIP_ID || ++ dev->chip_id == KSZ9563_CHIP_ID) + proto = DSA_TAG_PROTO_KSZ9893; + + if (dev->chip_id == KSZ9477_CHIP_ID || +@@ -2503,7 +2529,8 @@ static void ksz_set_xmii(struct ksz_device *dev, int port, + data8 |= bitval[P_RGMII_SEL]; + /* On KSZ9893, disable RGMII in-band status support */ + if (dev->chip_id == KSZ9893_CHIP_ID || +- dev->chip_id == KSZ8563_CHIP_ID) ++ dev->chip_id == KSZ8563_CHIP_ID || ++ dev->chip_id == KSZ9563_CHIP_ID) + data8 &= ~P_MII_MAC_MODE; + break; + default: +@@ -2776,6 +2803,8 @@ static int ksz_switch_detect(struct ksz_device *dev) + + if (id4 == SKU_ID_KSZ8563) + dev->chip_id = KSZ8563_CHIP_ID; ++ else if (id4 == SKU_ID_KSZ9563) ++ dev->chip_id = KSZ9563_CHIP_ID; + else + dev->chip_id = KSZ9893_CHIP_ID; + +diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h +index a3a7a90dad967..6f263e36ee373 100644 +--- a/drivers/net/dsa/microchip/ksz_common.h ++++ b/drivers/net/dsa/microchip/ksz_common.h +@@ -154,6 +154,7 @@ enum ksz_model { + KSZ9896, + KSZ9897, + KSZ9893, ++ KSZ9563, + KSZ9567, + LAN9370, + LAN9371, +@@ -172,6 +173,7 @@ enum ksz_chip_id { + KSZ9896_CHIP_ID = 0x00989600, + KSZ9897_CHIP_ID = 0x00989700, + KSZ9893_CHIP_ID = 0x00989300, ++ KSZ9563_CHIP_ID = 0x00956300, + KSZ9567_CHIP_ID = 0x00956700, + LAN9370_CHIP_ID = 0x00937000, + LAN9371_CHIP_ID = 0x00937100, +@@ -563,6 +565,7 @@ static inline int is_lan937x(struct ksz_device *dev) + /* KSZ9893, KSZ9563, KSZ8563 specific register */ + #define REG_CHIP_ID4 0x0f + #define SKU_ID_KSZ8563 0x3c ++#define SKU_ID_KSZ9563 0x1c + + /* Driver set switch broadcast storm protection at 10% rate. */ + #define BROADCAST_STORM_PROT_RATE 10 +diff --git a/drivers/net/dsa/microchip/ksz_spi.c b/drivers/net/dsa/microchip/ksz_spi.c +index 1b6ab891b986e..4f2186779082a 100644 +--- a/drivers/net/dsa/microchip/ksz_spi.c ++++ b/drivers/net/dsa/microchip/ksz_spi.c +@@ -163,7 +163,7 @@ static const struct of_device_id ksz_dt_ids[] = { + }, + { + .compatible = "microchip,ksz9563", +- .data = &ksz_switch_chips[KSZ9893] ++ .data = &ksz_switch_chips[KSZ9563] + }, + { + .compatible = "microchip,ksz8563", +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-microchip-enable-port-queues-for-tc-mqprio.patch b/queue-6.1/net-dsa-microchip-enable-port-queues-for-tc-mqprio.patch new file mode 100644 index 0000000000..c822886b15 --- /dev/null +++ b/queue-6.1/net-dsa-microchip-enable-port-queues-for-tc-mqprio.patch @@ -0,0 +1,350 @@ +From 8c099a416f000e137e926b20ea091cc0b3513a56 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 20 Jan 2023 10:51:34 +0530 +Subject: net: dsa: microchip: enable port queues for tc mqprio + +From: Arun Ramadoss + +[ Upstream commit e30f33a5f5c74f278feaa57517d851874dfc640f ] + +LAN937x family of switches has 8 queues per port where the KSZ switches +has 4 queues per port. By default, only one queue per port is enabled. +The queues are configurable in 2, 4 or 8. This patch add 8 number of +queues for LAN937x and 4 for other switches. +In the tag_ksz.c file, prioirty of the packet is queried using the skb +buffer and the corresponding value is updated in the tag. + +Signed-off-by: Arun Ramadoss +Signed-off-by: Jakub Kicinski +Stable-dep-of: ba54bce747fa ("net: dsa: microchip: linearize skb for tail-tagging switches") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz9477.c | 18 ++++++++++++++++++ + drivers/net/dsa/microchip/ksz9477.h | 1 + + drivers/net/dsa/microchip/ksz9477_reg.h | 6 +++++- + drivers/net/dsa/microchip/ksz_common.c | 18 ++++++++++++++++++ + drivers/net/dsa/microchip/ksz_common.h | 1 + + drivers/net/dsa/microchip/lan937x_main.c | 4 ++++ + net/dsa/tag_ksz.c | 15 +++++++++++++++ + 7 files changed, 62 insertions(+), 1 deletion(-) + +diff --git a/drivers/net/dsa/microchip/ksz9477.c b/drivers/net/dsa/microchip/ksz9477.c +index 6858f96c6b85a..678d38b6f24e2 100644 +--- a/drivers/net/dsa/microchip/ksz9477.c ++++ b/drivers/net/dsa/microchip/ksz9477.c +@@ -1012,6 +1012,22 @@ int ksz9477_set_ageing_time(struct ksz_device *dev, unsigned int msecs) + return ksz_write8(dev, REG_SW_LUE_CTRL_3, value); + } + ++void ksz9477_port_queue_split(struct ksz_device *dev, int port) ++{ ++ u8 data; ++ ++ if (dev->info->num_tx_queues == 8) ++ data = PORT_EIGHT_QUEUE; ++ else if (dev->info->num_tx_queues == 4) ++ data = PORT_FOUR_QUEUE; ++ else if (dev->info->num_tx_queues == 2) ++ data = PORT_TWO_QUEUE; ++ else ++ data = PORT_SINGLE_QUEUE; ++ ++ ksz_prmw8(dev, port, REG_PORT_CTRL_0, PORT_QUEUE_SPLIT_MASK, data); ++} ++ + void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) + { + struct dsa_switch *ds = dev->ds; +@@ -1023,6 +1039,8 @@ void ksz9477_port_setup(struct ksz_device *dev, int port, bool cpu_port) + ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_TAIL_TAG_ENABLE, + true); + ++ ksz9477_port_queue_split(dev, port); ++ + ksz_port_cfg(dev, port, REG_PORT_CTRL_0, PORT_MAC_LOOPBACK, false); + + /* set back pressure */ +diff --git a/drivers/net/dsa/microchip/ksz9477.h b/drivers/net/dsa/microchip/ksz9477.h +index 00862c4cfb7f1..beb9d73086526 100644 +--- a/drivers/net/dsa/microchip/ksz9477.h ++++ b/drivers/net/dsa/microchip/ksz9477.h +@@ -57,5 +57,6 @@ int ksz9477_reset_switch(struct ksz_device *dev); + int ksz9477_dsa_init(struct ksz_device *dev); + int ksz9477_switch_init(struct ksz_device *dev); + void ksz9477_switch_exit(struct ksz_device *dev); ++void ksz9477_port_queue_split(struct ksz_device *dev, int port); + + #endif +diff --git a/drivers/net/dsa/microchip/ksz9477_reg.h b/drivers/net/dsa/microchip/ksz9477_reg.h +index ffb9484018ed5..bf407cd6b51da 100644 +--- a/drivers/net/dsa/microchip/ksz9477_reg.h ++++ b/drivers/net/dsa/microchip/ksz9477_reg.h +@@ -849,7 +849,11 @@ + #define PORT_FORCE_TX_FLOW_CTRL BIT(4) + #define PORT_FORCE_RX_FLOW_CTRL BIT(3) + #define PORT_TAIL_TAG_ENABLE BIT(2) +-#define PORT_QUEUE_SPLIT_ENABLE 0x3 ++#define PORT_QUEUE_SPLIT_MASK GENMASK(1, 0) ++#define PORT_EIGHT_QUEUE 0x3 ++#define PORT_FOUR_QUEUE 0x2 ++#define PORT_TWO_QUEUE 0x1 ++#define PORT_SINGLE_QUEUE 0x0 + + #define REG_PORT_CTRL_1 0x0021 + +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index 966990a34a073..d2b646dfeb623 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -1035,6 +1035,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x07, /* can be configured as cpu port */ + .port_cnt = 3, /* total port count */ + .port_nirqs = 3, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1061,6 +1062,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .num_statics = 8, + .cpu_ports = 0x10, /* can be configured as cpu port */ + .port_cnt = 5, /* total cpu and user ports */ ++ .num_tx_queues = 4, + .ops = &ksz8_dev_ops, + .ksz87xx_eee_link_erratum = true, + .mib_names = ksz9477_mib_names, +@@ -1099,6 +1101,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .num_statics = 8, + .cpu_ports = 0x10, /* can be configured as cpu port */ + .port_cnt = 5, /* total cpu and user ports */ ++ .num_tx_queues = 4, + .ops = &ksz8_dev_ops, + .ksz87xx_eee_link_erratum = true, + .mib_names = ksz9477_mib_names, +@@ -1123,6 +1126,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .num_statics = 8, + .cpu_ports = 0x10, /* can be configured as cpu port */ + .port_cnt = 5, /* total cpu and user ports */ ++ .num_tx_queues = 4, + .ops = &ksz8_dev_ops, + .ksz87xx_eee_link_erratum = true, + .mib_names = ksz9477_mib_names, +@@ -1147,6 +1151,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .num_statics = 8, + .cpu_ports = 0x4, /* can be configured as cpu port */ + .port_cnt = 3, ++ .num_tx_queues = 4, + .ops = &ksz8_dev_ops, + .mib_names = ksz88xx_mib_names, + .mib_cnt = ARRAY_SIZE(ksz88xx_mib_names), +@@ -1168,6 +1173,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x7F, /* can be configured as cpu port */ + .port_cnt = 7, /* total physical port count */ + .port_nirqs = 4, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .phy_errata_9477 = true, + .mib_names = ksz9477_mib_names, +@@ -1200,6 +1206,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x3F, /* can be configured as cpu port */ + .port_cnt = 6, /* total physical port count */ + .port_nirqs = 2, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .phy_errata_9477 = true, + .mib_names = ksz9477_mib_names, +@@ -1232,6 +1239,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x7F, /* can be configured as cpu port */ + .port_cnt = 7, /* total physical port count */ + .port_nirqs = 2, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .phy_errata_9477 = true, + .mib_names = ksz9477_mib_names, +@@ -1262,6 +1270,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x07, /* can be configured as cpu port */ + .port_cnt = 3, /* total port count */ + .port_nirqs = 2, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1287,6 +1296,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x07, /* can be configured as cpu port */ + .port_cnt = 3, /* total port count */ + .port_nirqs = 3, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1312,6 +1322,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x7F, /* can be configured as cpu port */ + .port_cnt = 7, /* total physical port count */ + .port_nirqs = 3, ++ .num_tx_queues = 4, + .ops = &ksz9477_dev_ops, + .phy_errata_9477 = true, + .mib_names = ksz9477_mib_names, +@@ -1342,6 +1353,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x10, /* can be configured as cpu port */ + .port_cnt = 5, /* total physical port count */ + .port_nirqs = 6, ++ .num_tx_queues = 8, + .ops = &lan937x_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1366,6 +1378,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x30, /* can be configured as cpu port */ + .port_cnt = 6, /* total physical port count */ + .port_nirqs = 6, ++ .num_tx_queues = 8, + .ops = &lan937x_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1390,6 +1403,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x30, /* can be configured as cpu port */ + .port_cnt = 8, /* total physical port count */ + .port_nirqs = 6, ++ .num_tx_queues = 8, + .ops = &lan937x_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1418,6 +1432,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x38, /* can be configured as cpu port */ + .port_cnt = 5, /* total physical port count */ + .port_nirqs = 6, ++ .num_tx_queues = 8, + .ops = &lan937x_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1446,6 +1461,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .cpu_ports = 0x30, /* can be configured as cpu port */ + .port_cnt = 8, /* total physical port count */ + .port_nirqs = 6, ++ .num_tx_queues = 8, + .ops = &lan937x_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +@@ -1977,6 +1993,8 @@ static int ksz_setup(struct dsa_switch *ds) + + dev->dev_ops->enable_stp_addr(dev); + ++ ds->num_tx_queues = dev->info->num_tx_queues; ++ + regmap_update_bits(dev->regmap[0], regs[S_MULTICAST_CTRL], + MULTICAST_STORM_DISABLE, MULTICAST_STORM_DISABLE); + +diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h +index 6f263e36ee373..f887f264152b4 100644 +--- a/drivers/net/dsa/microchip/ksz_common.h ++++ b/drivers/net/dsa/microchip/ksz_common.h +@@ -46,6 +46,7 @@ struct ksz_chip_data { + int cpu_ports; + int port_cnt; + u8 port_nirqs; ++ u8 num_tx_queues; + const struct ksz_dev_ops *ops; + bool phy_errata_9477; + bool ksz87xx_eee_link_erratum; +diff --git a/drivers/net/dsa/microchip/lan937x_main.c b/drivers/net/dsa/microchip/lan937x_main.c +index 338eff0818dfd..9c54bd0c620c6 100644 +--- a/drivers/net/dsa/microchip/lan937x_main.c ++++ b/drivers/net/dsa/microchip/lan937x_main.c +@@ -15,6 +15,7 @@ + + #include "lan937x_reg.h" + #include "ksz_common.h" ++#include "ksz9477.h" + #include "lan937x.h" + + static int lan937x_cfg(struct ksz_device *dev, u32 addr, u8 bits, bool set) +@@ -180,6 +181,9 @@ void lan937x_port_setup(struct ksz_device *dev, int port, bool cpu_port) + lan937x_port_cfg(dev, port, REG_PORT_CTRL_0, + PORT_TAIL_TAG_ENABLE, true); + ++ /* Enable the Port Queue split */ ++ ksz9477_port_queue_split(dev, port); ++ + /* set back pressure for half duplex */ + lan937x_port_cfg(dev, port, REG_PORT_MAC_CTRL_1, PORT_BACK_PRESSURE, + true); +diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c +index 429250298ac4b..fa61b2bc79391 100644 +--- a/net/dsa/tag_ksz.c ++++ b/net/dsa/tag_ksz.c +@@ -105,12 +105,15 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795); + #define KSZ9477_PTP_TAG_LEN 4 + #define KSZ9477_PTP_TAG_INDICATION 0x80 + ++#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7) + #define KSZ9477_TAIL_TAG_OVERRIDE BIT(9) + #define KSZ9477_TAIL_TAG_LOOKUP BIT(10) + + static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, + struct net_device *dev) + { ++ u16 queue_mapping = skb_get_queue_mapping(skb); ++ u8 prio = netdev_txq_to_tc(dev, queue_mapping); + struct dsa_port *dp = dsa_slave_to_port(dev); + __be16 *tag; + u8 *addr; +@@ -125,6 +128,8 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, + + val = BIT(dp->index); + ++ val |= FIELD_PREP(KSZ9477_TAIL_TAG_PRIO, prio); ++ + if (is_link_local_ether_addr(addr)) + val |= KSZ9477_TAIL_TAG_OVERRIDE; + +@@ -158,12 +163,15 @@ static const struct dsa_device_ops ksz9477_netdev_ops = { + DSA_TAG_DRIVER(ksz9477_netdev_ops); + MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9477); + ++#define KSZ9893_TAIL_TAG_PRIO GENMASK(4, 3) + #define KSZ9893_TAIL_TAG_OVERRIDE BIT(5) + #define KSZ9893_TAIL_TAG_LOOKUP BIT(6) + + static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, + struct net_device *dev) + { ++ u16 queue_mapping = skb_get_queue_mapping(skb); ++ u8 prio = netdev_txq_to_tc(dev, queue_mapping); + struct dsa_port *dp = dsa_slave_to_port(dev); + u8 *addr; + u8 *tag; +@@ -177,6 +185,8 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb, + + *tag = BIT(dp->index); + ++ *tag |= FIELD_PREP(KSZ9893_TAIL_TAG_PRIO, prio); ++ + if (is_link_local_ether_addr(addr)) + *tag |= KSZ9893_TAIL_TAG_OVERRIDE; + +@@ -213,11 +223,14 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893); + #define LAN937X_TAIL_TAG_BLOCKING_OVERRIDE BIT(11) + #define LAN937X_TAIL_TAG_LOOKUP BIT(12) + #define LAN937X_TAIL_TAG_VALID BIT(13) ++#define LAN937X_TAIL_TAG_PRIO GENMASK(10, 8) + #define LAN937X_TAIL_TAG_PORT_MASK 7 + + static struct sk_buff *lan937x_xmit(struct sk_buff *skb, + struct net_device *dev) + { ++ u16 queue_mapping = skb_get_queue_mapping(skb); ++ u8 prio = netdev_txq_to_tc(dev, queue_mapping); + struct dsa_port *dp = dsa_slave_to_port(dev); + const struct ethhdr *hdr = eth_hdr(skb); + __be16 *tag; +@@ -230,6 +243,8 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb, + + val = BIT(dp->index); + ++ val |= FIELD_PREP(LAN937X_TAIL_TAG_PRIO, prio); ++ + if (is_link_local_ether_addr(hdr->h_dest)) + val |= LAN937X_TAIL_TAG_BLOCKING_OVERRIDE; + +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-microchip-ksz8563-add-number-of-port-irq.patch b/queue-6.1/net-dsa-microchip-ksz8563-add-number-of-port-irq.patch new file mode 100644 index 0000000000..85530998bc --- /dev/null +++ b/queue-6.1/net-dsa-microchip-ksz8563-add-number-of-port-irq.patch @@ -0,0 +1,35 @@ +From 918fe1d978150745c40d7ca88f33350021c6a2b9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 7 Nov 2022 14:59:21 +0530 +Subject: net: dsa: microchip: ksz8563: Add number of port irq + +From: Rakesh Sankaranarayanan + +[ Upstream commit 4630d1420f841b2b112928359c4dc148d9a7d1f8 ] + +KSZ8563 have three port interrupts: PTP, PHY and ACL. Add +port_nirq as 3 for KSZ8563 inside ksz_chip_data. + +Signed-off-by: Rakesh Sankaranarayanan +Signed-off-by: David S. Miller +Stable-dep-of: ba54bce747fa ("net: dsa: microchip: linearize skb for tail-tagging switches") +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/microchip/ksz_common.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c +index effc66d1555da..966990a34a073 100644 +--- a/drivers/net/dsa/microchip/ksz_common.c ++++ b/drivers/net/dsa/microchip/ksz_common.c +@@ -1034,6 +1034,7 @@ const struct ksz_chip_data ksz_switch_chips[] = { + .num_statics = 16, + .cpu_ports = 0x07, /* can be configured as cpu port */ + .port_cnt = 3, /* total port count */ ++ .port_nirqs = 3, + .ops = &ksz9477_dev_ops, + .mib_names = ksz9477_mib_names, + .mib_cnt = ARRAY_SIZE(ksz9477_mib_names), +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-microchip-linearize-skb-for-tail-tagging-swi.patch b/queue-6.1/net-dsa-microchip-linearize-skb-for-tail-tagging-swi.patch new file mode 100644 index 0000000000..e45d4e427f --- /dev/null +++ b/queue-6.1/net-dsa-microchip-linearize-skb-for-tail-tagging-swi.patch @@ -0,0 +1,98 @@ +From 23b68ba2cf246f14b812d79a50f6ba4f5ee1bbc9 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 15 May 2025 09:29:19 +0200 +Subject: net: dsa: microchip: linearize skb for tail-tagging switches + +From: Jakob Unterwurzacher + +[ Upstream commit ba54bce747fa9e07896c1abd9b48545f7b4b31d2 ] + +The pointer arithmentic for accessing the tail tag only works +for linear skbs. + +For nonlinear skbs, it reads uninitialized memory inside the +skb headroom, essentially randomizing the tag. I have observed +it gets set to 6 most of the time. + +Example where ksz9477_rcv thinks that the packet from port 1 comes from port 6 +(which does not exist for the ksz9896 that's in use), dropping the packet. +Debug prints added by me (not included in this patch): + + [ 256.645337] ksz9477_rcv:323 tag0=6 + [ 256.645349] skb len=47 headroom=78 headlen=0 tailroom=0 + mac=(64,14) mac_len=14 net=(78,0) trans=78 + shinfo(txflags=0 nr_frags=1 gso(size=0 type=0 segs=0)) + csum(0x0 start=0 offset=0 ip_summed=0 complete_sw=0 valid=0 level=0) + hash(0x0 sw=0 l4=0) proto=0x00f8 pkttype=1 iif=3 + priority=0x0 mark=0x0 alloc_cpu=0 vlan_all=0x0 + encapsulation=0 inner(proto=0x0000, mac=0, net=0, trans=0) + [ 256.645377] dev name=end1 feat=0x0002e10200114bb3 + [ 256.645386] skb headroom: 00000000: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + [ 256.645395] skb headroom: 00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + [ 256.645403] skb headroom: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + [ 256.645411] skb headroom: 00000030: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 + [ 256.645420] skb headroom: 00000040: ff ff ff ff ff ff 00 1c 19 f2 e2 db 08 06 + [ 256.645428] skb frag: 00000000: 00 01 08 00 06 04 00 01 00 1c 19 f2 e2 db 0a 02 + [ 256.645436] skb frag: 00000010: 00 83 00 00 00 00 00 00 0a 02 a0 2f 00 00 00 00 + [ 256.645444] skb frag: 00000020: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 01 + [ 256.645452] ksz_common_rcv:92 dsa_conduit_find_user returned NULL + +Call skb_linearize before trying to access the tag. + +This patch fixes ksz9477_rcv which is used by the ksz9896 I have at +hand, and also applies the same fix to ksz8795_rcv which seems to have +the same problem. + +Signed-off-by: Jakob Unterwurzacher +CC: stable@vger.kernel.org +Fixes: 016e43a26bab ("net: dsa: ksz: Add KSZ8795 tag code") +Fixes: 8b8010fb7876 ("dsa: add support for Microchip KSZ tail tagging") +Reviewed-by: Vladimir Oltean +Link: https://patch.msgid.link/20250515072920.2313014-1-jakob.unterwurzacher@cherry.de +Signed-off-by: Jakub Kicinski +Signed-off-by: Sasha Levin +--- + net/dsa/tag_ksz.c | 19 +++++++++++++++---- + 1 file changed, 15 insertions(+), 4 deletions(-) + +diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c +index cc3a920eb7f4e..8fbcdc9a0f3bd 100644 +--- a/net/dsa/tag_ksz.c ++++ b/net/dsa/tag_ksz.c +@@ -69,7 +69,12 @@ static struct sk_buff *ksz8795_xmit(struct sk_buff *skb, struct net_device *dev) + + static struct sk_buff *ksz8795_rcv(struct sk_buff *skb, struct net_device *dev) + { +- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; ++ u8 *tag; ++ ++ if (skb_linearize(skb)) ++ return NULL; ++ ++ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; + + return ksz_common_rcv(skb, dev, tag[0] & 7, KSZ_EGRESS_TAG_LEN); + } +@@ -141,10 +146,16 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb, + + static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) + { +- /* Tag decoding */ +- u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; +- unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; + unsigned int len = KSZ_EGRESS_TAG_LEN; ++ unsigned int port; ++ u8 *tag; ++ ++ if (skb_linearize(skb)) ++ return NULL; ++ ++ /* Tag decoding */ ++ tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; ++ port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; + + /* Extra 4-bytes PTP timestamp */ + if (tag[0] & KSZ9477_PTP_TAG_INDICATION) +-- +2.39.5 + diff --git a/queue-6.1/net-dsa-microchip-update-tag_ksz-masks-for-ksz9477-f.patch b/queue-6.1/net-dsa-microchip-update-tag_ksz-masks-for-ksz9477-f.patch new file mode 100644 index 0000000000..619b86a83a --- /dev/null +++ b/queue-6.1/net-dsa-microchip-update-tag_ksz-masks-for-ksz9477-f.patch @@ -0,0 +1,49 @@ +From 9722d1162cdb1e7f2fdb270bcb3ec41f2d64e10e Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Mon, 9 Sep 2024 15:42:59 +0200 +Subject: net: dsa: microchip: update tag_ksz masks for KSZ9477 family + +From: Pieter Van Trappen + +[ Upstream commit 3f464b193d40e49299dcd087b10cc3b77cbbea68 ] + +Remove magic number 7 by introducing a GENMASK macro instead. +Remove magic number 0x80 by using the BIT macro instead. + +Signed-off-by: Pieter Van Trappen +Reviewed-by: Florian Fainelli +Link: https://patch.msgid.link/20240909134301.75448-1-vtpieter@gmail.com +Signed-off-by: Jakub Kicinski +Stable-dep-of: ba54bce747fa ("net: dsa: microchip: linearize skb for tail-tagging switches") +Signed-off-by: Sasha Levin +--- + net/dsa/tag_ksz.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c +index fa61b2bc79391..cc3a920eb7f4e 100644 +--- a/net/dsa/tag_ksz.c ++++ b/net/dsa/tag_ksz.c +@@ -103,8 +103,9 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795); + + #define KSZ9477_INGRESS_TAG_LEN 2 + #define KSZ9477_PTP_TAG_LEN 4 +-#define KSZ9477_PTP_TAG_INDICATION 0x80 ++#define KSZ9477_PTP_TAG_INDICATION BIT(7) + ++#define KSZ9477_TAIL_TAG_EG_PORT_M GENMASK(2, 0) + #define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7) + #define KSZ9477_TAIL_TAG_OVERRIDE BIT(9) + #define KSZ9477_TAIL_TAG_LOOKUP BIT(10) +@@ -142,7 +143,7 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev) + { + /* Tag decoding */ + u8 *tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; +- unsigned int port = tag[0] & 7; ++ unsigned int port = tag[0] & KSZ9477_TAIL_TAG_EG_PORT_M; + unsigned int len = KSZ_EGRESS_TAG_LEN; + + /* Extra 4-bytes PTP timestamp */ +-- +2.39.5 + diff --git a/queue-6.1/pmdomain-core-fix-error-checking-in-genpd_dev_pm_att.patch b/queue-6.1/pmdomain-core-fix-error-checking-in-genpd_dev_pm_att.patch new file mode 100644 index 0000000000..23e92b2438 --- /dev/null +++ b/queue-6.1/pmdomain-core-fix-error-checking-in-genpd_dev_pm_att.patch @@ -0,0 +1,43 @@ +From 3a6a44c0563e5fa6baefad2cfbf783f4d0498ca2 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Thu, 8 May 2025 09:29:23 +0300 +Subject: pmdomain: core: Fix error checking in genpd_dev_pm_attach_by_id() + +From: Dan Carpenter + +[ Upstream commit 0f5757667ec0aaf2456c3b76fcf0c6c3ea3591fe ] + +The error checking for of_count_phandle_with_args() does not handle +negative error codes correctly. The problem is that "index" is a u32 so +in the condition "if (index >= num_domains)" negative error codes stored +in "num_domains" are type promoted to very high positive values and +"index" is always going to be valid. + +Test for negative error codes first and then test if "index" is valid. + +Fixes: 3ccf3f0cd197 ("PM / Domains: Enable genpd_dev_pm_attach_by_id|name() for single PM domain") +Signed-off-by: Dan Carpenter +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/aBxPQ8AI8N5v-7rL@stanley.mountain +Signed-off-by: Ulf Hansson +Signed-off-by: Sasha Levin +--- + drivers/base/power/domain.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c +index 3c44b0313a10e..a19a1f70adb2a 100644 +--- a/drivers/base/power/domain.c ++++ b/drivers/base/power/domain.c +@@ -2840,7 +2840,7 @@ struct device *genpd_dev_pm_attach_by_id(struct device *dev, + /* Verify that the index is within a valid range. */ + num_domains = of_count_phandle_with_args(dev->of_node, "power-domains", + "#power-domain-cells"); +- if (index >= num_domains) ++ if (num_domains < 0 || index >= num_domains) + return NULL; + + /* Allocate and register device on the genpd bus. */ +-- +2.39.5 + diff --git a/queue-6.1/serial-sh-sci-check-if-tx-data-was-written-to-device.patch b/queue-6.1/serial-sh-sci-check-if-tx-data-was-written-to-device.patch new file mode 100644 index 0000000000..853a002663 --- /dev/null +++ b/queue-6.1/serial-sh-sci-check-if-tx-data-was-written-to-device.patch @@ -0,0 +1,155 @@ +From 677eef5a7f49ed824b1c3c9f51161c4da6ba0808 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 08:05:14 +0300 +Subject: serial: sh-sci: Check if TX data was written to device in .tx_empty() + +From: Claudiu Beznea + +commit 7cc0e0a43a91052477c2921f924a37d9c3891f0c upstream. + +On the Renesas RZ/G3S, when doing suspend to RAM, the uart_suspend_port() +is called. The uart_suspend_port() calls 3 times the +struct uart_port::ops::tx_empty() before shutting down the port. + +According to the documentation, the struct uart_port::ops::tx_empty() +API tests whether the transmitter FIFO and shifter for the port is +empty. + +The Renesas RZ/G3S SCIFA IP reports the number of data units stored in the +transmit FIFO through the FDR (FIFO Data Count Register). The data units +in the FIFOs are written in the shift register and transmitted from there. +The TEND bit in the Serial Status Register reports if the data was +transmitted from the shift register. + +In the previous code, in the tx_empty() API implemented by the sh-sci +driver, it is considered that the TX is empty if the hardware reports the +TEND bit set and the number of data units in the FIFO is zero. + +According to the HW manual, the TEND bit has the following meaning: + +0: Transmission is in the waiting state or in progress. +1: Transmission is completed. + +It has been noticed that when opening the serial device w/o using it and +then switch to a power saving mode, the tx_empty() call in the +uart_port_suspend() function fails, leading to the "Unable to drain +transmitter" message being printed on the console. This is because the +TEND=0 if nothing has been transmitted and the FIFOs are empty. As the +TEND=0 has double meaning (waiting state, in progress) we can't +determined the scenario described above. + +Add a software workaround for this. This sets a variable if any data has +been sent on the serial console (when using PIO) or if the DMA callback has +been called (meaning something has been transmitted). In the tx_empty() +API the status of the DMA transaction is also checked and if it is +completed or in progress the code falls back in checking the hardware +registers instead of relying on the software variable. + +Fixes: 73a19e4c0301 ("serial: sh-sci: Add DMA support.") +Cc: stable@vger.kernel.org +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20241125115856.513642-1-claudiu.beznea.uj@bp.renesas.com +Signed-off-by: Greg Kroah-Hartman +[claudiu.beznea: fixed conflict by: + - keeping serial_port_out() instead of sci_port_out() in + sci_transmit_chars() + - keeping !uart_circ_empty(xmit) condition in sci_dma_tx_complete(), + after s->tx_occurred = true; assignement] +Signed-off-by: Claudiu Beznea +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/sh-sci.c | 29 +++++++++++++++++++++++++++++ + 1 file changed, 29 insertions(+) + +diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c +index ed468b676f0b9..f8f301db84339 100644 +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -175,6 +175,7 @@ struct sci_port { + + bool has_rtscts; + bool autorts; ++ bool tx_occurred; + }; + + #define SCI_NPORTS CONFIG_SERIAL_SH_SCI_NR_UARTS +@@ -825,6 +826,7 @@ static void sci_transmit_chars(struct uart_port *port) + { + struct circ_buf *xmit = &port->state->xmit; + unsigned int stopped = uart_tx_stopped(port); ++ struct sci_port *s = to_sci_port(port); + unsigned short status; + unsigned short ctrl; + int count; +@@ -856,6 +858,7 @@ static void sci_transmit_chars(struct uart_port *port) + } + + serial_port_out(port, SCxTDR, c); ++ s->tx_occurred = true; + + port->icount.tx++; + } while (--count > 0); +@@ -1208,6 +1211,8 @@ static void sci_dma_tx_complete(void *arg) + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(port); + ++ s->tx_occurred = true; ++ + if (!uart_circ_empty(xmit)) { + s->cookie_tx = 0; + schedule_work(&s->work_tx); +@@ -1688,6 +1693,19 @@ static void sci_flush_buffer(struct uart_port *port) + s->cookie_tx = -EINVAL; + } + } ++ ++static void sci_dma_check_tx_occurred(struct sci_port *s) ++{ ++ struct dma_tx_state state; ++ enum dma_status status; ++ ++ if (!s->chan_tx) ++ return; ++ ++ status = dmaengine_tx_status(s->chan_tx, s->cookie_tx, &state); ++ if (status == DMA_COMPLETE || status == DMA_IN_PROGRESS) ++ s->tx_occurred = true; ++} + #else /* !CONFIG_SERIAL_SH_SCI_DMA */ + static inline void sci_request_dma(struct uart_port *port) + { +@@ -1697,6 +1715,10 @@ static inline void sci_free_dma(struct uart_port *port) + { + } + ++static void sci_dma_check_tx_occurred(struct sci_port *s) ++{ ++} ++ + #define sci_flush_buffer NULL + #endif /* !CONFIG_SERIAL_SH_SCI_DMA */ + +@@ -2009,6 +2031,12 @@ static unsigned int sci_tx_empty(struct uart_port *port) + { + unsigned short status = serial_port_in(port, SCxSR); + unsigned short in_tx_fifo = sci_txfill(port); ++ struct sci_port *s = to_sci_port(port); ++ ++ sci_dma_check_tx_occurred(s); ++ ++ if (!s->tx_occurred) ++ return TIOCSER_TEMT; + + return (status & SCxSR_TEND(port)) && !in_tx_fifo ? TIOCSER_TEMT : 0; + } +@@ -2179,6 +2207,7 @@ static int sci_startup(struct uart_port *port) + + dev_dbg(port->dev, "%s(%d)\n", __func__, port->line); + ++ s->tx_occurred = false; + sci_request_dma(port); + + ret = sci_request_irq(s); +-- +2.39.5 + diff --git a/queue-6.1/serial-sh-sci-clean-sci_ports-0-after-at-earlycon-ex.patch b/queue-6.1/serial-sh-sci-clean-sci_ports-0-after-at-earlycon-ex.patch new file mode 100644 index 0000000000..f9f396203b --- /dev/null +++ b/queue-6.1/serial-sh-sci-clean-sci_ports-0-after-at-earlycon-ex.patch @@ -0,0 +1,125 @@ +From ae28415c8d92cb2f7998e07065e3ec9ca93afcb0 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 08:05:16 +0300 +Subject: serial: sh-sci: Clean sci_ports[0] after at earlycon exit + +From: Claudiu Beznea + +commit 5f1017069933489add0c08659673443c9905659e upstream. + +The early_console_setup() function initializes sci_ports[0].port with an +object of type struct uart_port obtained from the struct earlycon_device +passed as an argument to early_console_setup(). + +Later, during serial port probing, the serial port used as earlycon +(e.g., port A) might be remapped to a different position in the sci_ports[] +array, and a different serial port (e.g., port B) might be assigned to slot +0. For example: + +sci_ports[0] = port B +sci_ports[X] = port A + +In this scenario, the new port mapped at index zero (port B) retains the +data associated with the earlycon configuration. Consequently, after the +Linux boot process, any access to the serial port now mapped to +sci_ports[0] (port B) will block the original earlycon port (port A). + +To address this, introduce an early_console_exit() function to clean up +sci_ports[0] when earlycon is exited. + +To prevent the cleanup of sci_ports[0] while the serial device is still +being used by earlycon, introduce the struct sci_port::probing flag and +account for it in early_console_exit(). + +Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support") +Cc: stable@vger.kernel.org +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20250116182249.3828577-5-claudiu.beznea.uj@bp.renesas.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Claudiu Beznea +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/sh-sci.c | 32 ++++++++++++++++++++++++++++++-- + 1 file changed, 30 insertions(+), 2 deletions(-) + +diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c +index 48bc66e2c0444..cd724fc284700 100644 +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -184,6 +184,7 @@ static struct sci_port sci_ports[SCI_NPORTS]; + static unsigned long sci_ports_in_use; + static struct uart_driver sci_uart_driver; + static bool sci_uart_earlycon; ++static bool sci_uart_earlycon_dev_probing; + + static inline struct sci_port * + to_sci_port(struct uart_port *uart) +@@ -3301,7 +3302,8 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev, + static int sci_probe_single(struct platform_device *dev, + unsigned int index, + struct plat_sci_port *p, +- struct sci_port *sciport) ++ struct sci_port *sciport, ++ struct resource *sci_res) + { + int ret; + +@@ -3348,6 +3350,14 @@ static int sci_probe_single(struct platform_device *dev, + sciport->port.flags |= UPF_HARD_FLOW; + } + ++ if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) { ++ /* ++ * Skip cleanup the sci_port[0] in early_console_exit(), this ++ * port is the same as the earlycon one. ++ */ ++ sci_uart_earlycon_dev_probing = true; ++ } ++ + return uart_add_one_port(&sci_uart_driver, &sciport->port); + } + +@@ -3406,7 +3416,7 @@ static int sci_probe(struct platform_device *dev) + + platform_set_drvdata(dev, sp); + +- ret = sci_probe_single(dev, dev_id, p, sp); ++ ret = sci_probe_single(dev, dev_id, p, sp, res); + if (ret) + return ret; + +@@ -3563,6 +3573,22 @@ sh_early_platform_init_buffer("earlyprintk", &sci_driver, + #ifdef CONFIG_SERIAL_SH_SCI_EARLYCON + static struct plat_sci_port port_cfg; + ++static int early_console_exit(struct console *co) ++{ ++ struct sci_port *sci_port = &sci_ports[0]; ++ ++ /* ++ * Clean the slot used by earlycon. A new SCI device might ++ * map to this slot. ++ */ ++ if (!sci_uart_earlycon_dev_probing) { ++ memset(sci_port, 0, sizeof(*sci_port)); ++ sci_uart_earlycon = false; ++ } ++ ++ return 0; ++} ++ + static int __init early_console_setup(struct earlycon_device *device, + int type) + { +@@ -3582,6 +3608,8 @@ static int __init early_console_setup(struct earlycon_device *device, + SCSCR_RE | SCSCR_TE | port_cfg.scscr); + + device->con->write = serial_console_write; ++ device->con->exit = early_console_exit; ++ + return 0; + } + static int __init sci_early_console_setup(struct earlycon_device *device, +-- +2.39.5 + diff --git a/queue-6.1/serial-sh-sci-increment-the-runtime-usage-counter-fo.patch b/queue-6.1/serial-sh-sci-increment-the-runtime-usage-counter-fo.patch new file mode 100644 index 0000000000..99639c3d7a --- /dev/null +++ b/queue-6.1/serial-sh-sci-increment-the-runtime-usage-counter-fo.patch @@ -0,0 +1,92 @@ +From dbbc48bd7d79961bab151022cd43e54b890140c3 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 08:05:17 +0300 +Subject: serial: sh-sci: Increment the runtime usage counter for the earlycon + device + +From: Claudiu Beznea + +commit 651dee03696e1dfde6d9a7e8664bbdcd9a10ea7f upstream. + +In the sh-sci driver, serial ports are mapped to the sci_ports[] array, +with earlycon mapped at index zero. + +The uart_add_one_port() function eventually calls __device_attach(), +which, in turn, calls pm_request_idle(). The identified code path is as +follows: + +uart_add_one_port() -> + serial_ctrl_register_port() -> + serial_core_register_port() -> + serial_core_port_device_add() -> + serial_base_port_add() -> + device_add() -> + bus_probe_device() -> + device_initial_probe() -> + __device_attach() -> + // ... + if (dev->p->dead) { + // ... + } else if (dev->driver) { + // ... + } else { + // ... + pm_request_idle(dev); + // ... + } + +The earlycon device clocks are enabled by the bootloader. However, the +pm_request_idle() call in __device_attach() disables the SCI port clocks +while earlycon is still active. + +The earlycon write function, serial_console_write(), calls +sci_poll_put_char() via serial_console_putchar(). If the SCI port clocks +are disabled, writing to earlycon may sometimes cause the SR.TDFE bit to +remain unset indefinitely, causing the while loop in sci_poll_put_char() +to never exit. On single-core SoCs, this can result in the system being +blocked during boot when this issue occurs. + +To resolve this, increment the runtime PM usage counter for the earlycon +SCI device before registering the UART port. + +Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support") +Cc: stable@vger.kernel.org +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20250116182249.3828577-6-claudiu.beznea.uj@bp.renesas.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Claudiu Beznea +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/sh-sci.c | 16 ++++++++++++++++ + 1 file changed, 16 insertions(+) + +diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c +index cd724fc284700..f5c0ec4ea08fc 100644 +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -3351,6 +3351,22 @@ static int sci_probe_single(struct platform_device *dev, + } + + if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) { ++ /* ++ * In case: ++ * - this is the earlycon port (mapped on index 0 in sci_ports[]) and ++ * - it now maps to an alias other than zero and ++ * - the earlycon is still alive (e.g., "earlycon keep_bootcon" is ++ * available in bootargs) ++ * ++ * we need to avoid disabling clocks and PM domains through the runtime ++ * PM APIs called in __device_attach(). For this, increment the runtime ++ * PM reference counter (the clocks and PM domains were already enabled ++ * by the bootloader). Otherwise the earlycon may access the HW when it ++ * has no clocks enabled leading to failures (infinite loop in ++ * sci_poll_put_char()). ++ */ ++ pm_runtime_get_noresume(&dev->dev); ++ + /* + * Skip cleanup the sci_port[0] in early_console_exit(), this + * port is the same as the earlycon one. +-- +2.39.5 + diff --git a/queue-6.1/serial-sh-sci-move-runtime-pm-enable-to-sci_probe_si.patch b/queue-6.1/serial-sh-sci-move-runtime-pm-enable-to-sci_probe_si.patch new file mode 100644 index 0000000000..c6764e817f --- /dev/null +++ b/queue-6.1/serial-sh-sci-move-runtime-pm-enable-to-sci_probe_si.patch @@ -0,0 +1,93 @@ +From 2d7ad8294444aa4b8759c4856ea48ee886b42ad4 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 08:05:15 +0300 +Subject: serial: sh-sci: Move runtime PM enable to sci_probe_single() + +From: Claudiu Beznea + +commit 239f11209e5f282e16f5241b99256e25dd0614b6 upstream. + +Relocate the runtime PM enable operation to sci_probe_single(). This change +prepares the codebase for upcoming fixes. + +While at it, replace the existing logic with a direct call to +devm_pm_runtime_enable() and remove sci_cleanup_single(). The +devm_pm_runtime_enable() function automatically handles disabling runtime +PM during driver removal. + +Reviewed-by: Geert Uytterhoeven +Signed-off-by: Claudiu Beznea +Link: https://lore.kernel.org/r/20250116182249.3828577-3-claudiu.beznea.uj@bp.renesas.com +Signed-off-by: Greg Kroah-Hartman +Signed-off-by: Claudiu Beznea +Signed-off-by: Sasha Levin +--- + drivers/tty/serial/sh-sci.c | 24 ++++++------------------ + 1 file changed, 6 insertions(+), 18 deletions(-) + +diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c +index f8f301db84339..48bc66e2c0444 100644 +--- a/drivers/tty/serial/sh-sci.c ++++ b/drivers/tty/serial/sh-sci.c +@@ -2982,10 +2982,6 @@ static int sci_init_single(struct platform_device *dev, + ret = sci_init_clocks(sci_port, &dev->dev); + if (ret < 0) + return ret; +- +- port->dev = &dev->dev; +- +- pm_runtime_enable(&dev->dev); + } + + port->type = p->type; +@@ -3015,11 +3011,6 @@ static int sci_init_single(struct platform_device *dev, + return 0; + } + +-static void sci_cleanup_single(struct sci_port *port) +-{ +- pm_runtime_disable(port->port.dev); +-} +- + #if defined(CONFIG_SERIAL_SH_SCI_CONSOLE) || \ + defined(CONFIG_SERIAL_SH_SCI_EARLYCON) + static void serial_console_putchar(struct uart_port *port, unsigned char ch) +@@ -3177,8 +3168,6 @@ static int sci_remove(struct platform_device *dev) + sci_ports_in_use &= ~BIT(port->port.line); + uart_remove_one_port(&sci_uart_driver, &port->port); + +- sci_cleanup_single(port); +- + if (port->port.fifosize > 1) + device_remove_file(&dev->dev, &dev_attr_rx_fifo_trigger); + if (type == PORT_SCIFA || type == PORT_SCIFB || type == PORT_HSCIF) +@@ -3341,6 +3330,11 @@ static int sci_probe_single(struct platform_device *dev, + if (ret) + return ret; + ++ sciport->port.dev = &dev->dev; ++ ret = devm_pm_runtime_enable(&dev->dev); ++ if (ret) ++ return ret; ++ + sciport->gpios = mctrl_gpio_init(&sciport->port, 0); + if (IS_ERR(sciport->gpios)) + return PTR_ERR(sciport->gpios); +@@ -3354,13 +3348,7 @@ static int sci_probe_single(struct platform_device *dev, + sciport->port.flags |= UPF_HARD_FLOW; + } + +- ret = uart_add_one_port(&sci_uart_driver, &sciport->port); +- if (ret) { +- sci_cleanup_single(sciport); +- return ret; +- } +- +- return 0; ++ return uart_add_one_port(&sci_uart_driver, &sciport->port); + } + + static int sci_probe(struct platform_device *dev) +-- +2.39.5 + diff --git a/queue-6.1/series b/queue-6.1/series index 89fbee2166..97d176a02c 100644 --- a/queue-6.1/series +++ b/queue-6.1/series @@ -192,3 +192,41 @@ asoc-codecs-hda-fix-rpm-usage-count-underflow.patch asoc-intel-avs-fix-deadlock-when-the-failing-ipc-is-.patch fix-propagation-graph-breakage-by-move_mount_set_gro.patch do_change_type-refuse-to-operate-on-unmounted-not-ou.patch +xfs-fix-interval-filtering-in-multi-step-fsmap-queri.patch +xfs-fix-integer-overflows-in-the-fsmap-rtbitmap-and-.patch +xfs-fix-getfsmap-reporting-past-the-last-rt-extent.patch +xfs-clean-up-the-rtbitmap-fsmap-backend.patch +xfs-fix-logdev-fsmap-query-result-filtering.patch +xfs-validate-fsmap-offsets-specified-in-the-query-ke.patch +xfs-fix-xfs_btree_query_range-callers-to-initialize-.patch +xfs-fix-an-agbno-overflow-in-__xfs_getfsmap_datadev.patch +xfs-fix-the-contact-address-for-the-sysfs-abi-docume.patch +xfs-verify-buffer-inode-and-dquot-items-every-tx-com.patch +xfs-use-consistent-uid-gid-when-grabbing-dquots-for-.patch +xfs-declare-xfs_file.c-symbols-in-xfs_file.h.patch +xfs-create-a-new-helper-to-return-a-file-s-allocatio.patch +xfs-fix-xfs_flush_unmap_range-range-for-rt.patch +xfs-fix-xfs_prepare_shift-range-for-rt.patch +xfs-don-t-walk-off-the-end-of-a-directory-data-block.patch +xfs-remove-unused-parameter-in-macro-xfs_dquot_logre.patch +xfs-attr-forks-require-attr-not-attr2.patch +xfs-conditionally-allow-fs_xflag_realtime-changes-if.patch +xfs-fix-the-owner-setting-issue-for-rmap-query-in-xf.patch +xfs-use-xfs_buf_daddr_null-for-daddrs-in-getfsmap-co.patch +xfs-take-m_growlock-when-running-growfsrt.patch +xfs-reset-rootdir-extent-size-hint-after-growfsrt.patch +net-dsa-microchip-add-ksz9563-in-ksz_switch_ops-and-.patch +net-dsa-microchip-ksz8563-add-number-of-port-irq.patch +net-dsa-microchip-enable-port-queues-for-tc-mqprio.patch +net-dsa-microchip-update-tag_ksz-masks-for-ksz9477-f.patch +net-dsa-microchip-linearize-skb-for-tail-tagging-swi.patch +pmdomain-core-fix-error-checking-in-genpd_dev_pm_att.patch +arm64-dts-marvell-udpu-define-pinctrl-state-for-alar.patch +input-synaptics-rmi-fix-crash-with-unsupported-versi.patch +arm64-dts-ti-k3-am65-main-drop-deprecated-ti-otap-de.patch +arm64-dts-ti-k3-am65-main-fix-sdhci-node-properties.patch +arm64-dts-ti-k3-am65-main-add-missing-taps-to-sdhci0.patch +serial-sh-sci-check-if-tx-data-was-written-to-device.patch +serial-sh-sci-move-runtime-pm-enable-to-sci_probe_si.patch +serial-sh-sci-clean-sci_ports-0-after-at-earlycon-ex.patch +serial-sh-sci-increment-the-runtime-usage-counter-fo.patch diff --git a/queue-6.1/xfs-attr-forks-require-attr-not-attr2.patch b/queue-6.1/xfs-attr-forks-require-attr-not-attr2.patch new file mode 100644 index 0000000000..d461e46273 --- /dev/null +++ b/queue-6.1/xfs-attr-forks-require-attr-not-attr2.patch @@ -0,0 +1,50 @@ +From 606215744dfc71fdae8b959376375bb7adda5b0f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:22 -0700 +Subject: xfs: attr forks require attr, not attr2 + +From: Darrick J. Wong + +[ Upstream commit 73c34b0b85d46bf9c2c0b367aeaffa1e2481b136 ] + +It turns out that I misunderstood the difference between the attr and +attr2 feature bits. "attr" means that at some point an attr fork was +created somewhere in the filesystem. "attr2" means that inodes have +variable-sized forks, but says nothing about whether or not there +actually /are/ attr forks in the system. + +If we have an attr fork, we only need to check that attr is set. + +Fixes: 99d9d8d05da26 ("xfs: scrub inode block mappings") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/scrub/bmap.c | 8 +++++++- + 1 file changed, 7 insertions(+), 1 deletion(-) + +diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c +index f0b9cb6506fdd..45b135929144e 100644 +--- a/fs/xfs/scrub/bmap.c ++++ b/fs/xfs/scrub/bmap.c +@@ -647,7 +647,13 @@ xchk_bmap( + } + break; + case XFS_ATTR_FORK: +- if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) ++ /* ++ * "attr" means that an attr fork was created at some point in ++ * the life of this filesystem. "attr2" means that inodes have ++ * variable-sized data/attr fork areas. Hence we only check ++ * attr here. ++ */ ++ if (!xfs_has_attr(mp)) + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + break; + default: +-- +2.39.5 + diff --git a/queue-6.1/xfs-clean-up-the-rtbitmap-fsmap-backend.patch b/queue-6.1/xfs-clean-up-the-rtbitmap-fsmap-backend.patch new file mode 100644 index 0000000000..5e569c0c3b --- /dev/null +++ b/queue-6.1/xfs-clean-up-the-rtbitmap-fsmap-backend.patch @@ -0,0 +1,173 @@ +From 27938437de62e86df034eb48953a306bf2a86e99 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:08 -0700 +Subject: xfs: clean up the rtbitmap fsmap backend + +From: Darrick J. Wong + +[ Upstream commit f045dd00328d78f25d64913285f4547f772d13e2 ] + +The rtbitmap fsmap backend doesn't query the rmapbt, so it's wasteful to +spend time initializing the rmap_irec objects. Worse yet, the logic to +query the rtbitmap is spread across three separate functions, which is +unnecessarily difficult to follow. + +Compute the start rtextent that we want from keys[0] directly and +combine the functions to avoid passing parameters around everywhere, and +consolidate all the logic into a single function. At one point many +years ago I intended to use __xfs_getfsmap_rtdev as the launching point +for realtime rmapbt queries, but this hasn't been the case for a long +time. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 62 +++++++--------------------------------------- + fs/xfs/xfs_trace.h | 25 +++++++++++++++++++ + 2 files changed, 34 insertions(+), 53 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 7b72992c14d94..202f162515bd5 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -512,22 +512,21 @@ xfs_getfsmap_rtdev_rtbitmap_helper( + return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); + } + +-/* Execute a getfsmap query against the realtime device. */ ++/* Execute a getfsmap query against the realtime device rtbitmap. */ + STATIC int +-__xfs_getfsmap_rtdev( ++xfs_getfsmap_rtdev_rtbitmap( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, +- int (*query_fn)(struct xfs_trans *, +- struct xfs_getfsmap_info *, +- xfs_rtblock_t start_rtb, +- xfs_rtblock_t end_rtb), + struct xfs_getfsmap_info *info) + { ++ ++ struct xfs_rtalloc_rec alow = { 0 }; ++ struct xfs_rtalloc_rec ahigh = { 0 }; + struct xfs_mount *mp = tp->t_mountp; + xfs_rtblock_t start_rtb; + xfs_rtblock_t end_rtb; + uint64_t eofs; +- int error = 0; ++ int error; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize); + if (keys[0].fmr_physical >= eofs) +@@ -536,14 +535,7 @@ __xfs_getfsmap_rtdev( + keys[0].fmr_physical + keys[0].fmr_length); + end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + +- /* Set up search keys */ +- info->low.rm_startblock = start_rtb; +- error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); +- if (error) +- return error; +- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); +- info->low.rm_blockcount = 0; +- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); ++ info->missing_owner = XFS_FMR_OWN_UNKNOWN; + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) { +@@ -552,32 +544,8 @@ __xfs_getfsmap_rtdev( + return 0; + } + +- info->high.rm_startblock = end_rtb; +- error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); +- if (error) +- return error; +- info->high.rm_offset = XFS_BB_TO_FSBT(mp, keys[1].fmr_offset); +- info->high.rm_blockcount = 0; +- xfs_getfsmap_set_irec_flags(&info->high, &keys[1]); +- +- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); +- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); +- +- return query_fn(tp, info, start_rtb, end_rtb); +-} +- +-/* Actually query the realtime bitmap. */ +-STATIC int +-xfs_getfsmap_rtdev_rtbitmap_query( +- struct xfs_trans *tp, +- struct xfs_getfsmap_info *info, +- xfs_rtblock_t start_rtb, +- xfs_rtblock_t end_rtb) +-{ +- struct xfs_rtalloc_rec alow = { 0 }; +- struct xfs_rtalloc_rec ahigh = { 0 }; +- struct xfs_mount *mp = tp->t_mountp; +- int error; ++ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb); ++ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb); + + xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED); + +@@ -609,18 +577,6 @@ xfs_getfsmap_rtdev_rtbitmap_query( + xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED); + return error; + } +- +-/* Execute a getfsmap query against the realtime device rtbitmap. */ +-STATIC int +-xfs_getfsmap_rtdev_rtbitmap( +- struct xfs_trans *tp, +- const struct xfs_fsmap *keys, +- struct xfs_getfsmap_info *info) +-{ +- info->missing_owner = XFS_FMR_OWN_UNKNOWN; +- return __xfs_getfsmap_rtdev(tp, keys, xfs_getfsmap_rtdev_rtbitmap_query, +- info); +-} + #endif /* CONFIG_XFS_RT */ + + /* Execute a getfsmap query against the regular data device. */ +diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h +index 20e2ec8b73aa8..a9e3081b6625d 100644 +--- a/fs/xfs/xfs_trace.h ++++ b/fs/xfs/xfs_trace.h +@@ -3491,6 +3491,31 @@ DEFINE_FSMAP_EVENT(xfs_fsmap_low_key); + DEFINE_FSMAP_EVENT(xfs_fsmap_high_key); + DEFINE_FSMAP_EVENT(xfs_fsmap_mapping); + ++DECLARE_EVENT_CLASS(xfs_fsmap_linear_class, ++ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), ++ TP_ARGS(mp, keydev, bno), ++ TP_STRUCT__entry( ++ __field(dev_t, dev) ++ __field(dev_t, keydev) ++ __field(xfs_fsblock_t, bno) ++ ), ++ TP_fast_assign( ++ __entry->dev = mp->m_super->s_dev; ++ __entry->keydev = new_decode_dev(keydev); ++ __entry->bno = bno; ++ ), ++ TP_printk("dev %d:%d keydev %d:%d bno 0x%llx", ++ MAJOR(__entry->dev), MINOR(__entry->dev), ++ MAJOR(__entry->keydev), MINOR(__entry->keydev), ++ __entry->bno) ++) ++#define DEFINE_FSMAP_LINEAR_EVENT(name) \ ++DEFINE_EVENT(xfs_fsmap_linear_class, name, \ ++ TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \ ++ TP_ARGS(mp, keydev, bno)) ++DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear); ++DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear); ++ + DECLARE_EVENT_CLASS(xfs_getfsmap_class, + TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap), + TP_ARGS(mp, fsmap), +-- +2.39.5 + diff --git a/queue-6.1/xfs-conditionally-allow-fs_xflag_realtime-changes-if.patch b/queue-6.1/xfs-conditionally-allow-fs_xflag_realtime-changes-if.patch new file mode 100644 index 0000000000..61b059dce5 --- /dev/null +++ b/queue-6.1/xfs-conditionally-allow-fs_xflag_realtime-changes-if.patch @@ -0,0 +1,51 @@ +From 0445714bc06b4135db32a5e150e0d4dc554d6955 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:23 -0700 +Subject: xfs: conditionally allow FS_XFLAG_REALTIME changes if S_DAX is set + +From: Darrick J. Wong + +[ Upstream commit 8d16762047c627073955b7ed171a36addaf7b1ff ] + +If a file has the S_DAX flag (aka fsdax access mode) set, we cannot +allow users to change the realtime flag unless the datadev and rtdev +both support fsdax access modes. Even if there are no extents allocated +to the file, the setattr thread could be racing with another thread +that has already started down the write code paths. + +Fixes: ba23cba9b3bdc ("fs: allow per-device dax status checking for filesystems") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_ioctl.c | 11 +++++++++++ + 1 file changed, 11 insertions(+) + +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index 1afb1b1b831ea..ef3dc07785669 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -1128,6 +1128,17 @@ xfs_ioctl_setattr_xflags( + /* Can't change realtime flag if any extents are allocated. */ + if (ip->i_df.if_nextents || ip->i_delayed_blks) + return -EINVAL; ++ ++ /* ++ * If S_DAX is enabled on this file, we can only switch the ++ * device if both support fsdax. We can't update S_DAX because ++ * there might be other threads walking down the access paths. ++ */ ++ if (IS_DAX(VFS_I(ip)) && ++ (mp->m_ddev_targp->bt_daxdev == NULL || ++ (mp->m_rtdev_targp && ++ mp->m_rtdev_targp->bt_daxdev == NULL))) ++ return -EINVAL; + } + + if (rtflag) { +-- +2.39.5 + diff --git a/queue-6.1/xfs-create-a-new-helper-to-return-a-file-s-allocatio.patch b/queue-6.1/xfs-create-a-new-helper-to-return-a-file-s-allocatio.patch new file mode 100644 index 0000000000..12d3a69f47 --- /dev/null +++ b/queue-6.1/xfs-create-a-new-helper-to-return-a-file-s-allocatio.patch @@ -0,0 +1,126 @@ +From de7ad32b58dae6c3ca9b5426973a5f800dd16776 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:17 -0700 +Subject: xfs: create a new helper to return a file's allocation unit + +From: Darrick J. Wong + +[ Upstream commit ee20808d848c87a51e176706d81b95a21747d6cf ] + +Create a new helper function to calculate the fundamental allocation +unit (i.e. the smallest unit of space we can allocate) of a file. +Things are going to get hairy with range-exchange on the realtime +device, so prepare for this now. + +Remove the static attribute from xfs_is_falloc_aligned since the next +patch will need it. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_file.c | 32 ++++++++++++-------------------- + fs/xfs/xfs_file.h | 3 +++ + fs/xfs/xfs_inode.c | 13 +++++++++++++ + fs/xfs/xfs_inode.h | 2 ++ + 4 files changed, 30 insertions(+), 20 deletions(-) + +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 6f7522977f7f7..3c910e36da69b 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -39,33 +39,25 @@ static const struct vm_operations_struct xfs_file_vm_ops; + * Decide if the given file range is aligned to the size of the fundamental + * allocation unit for the file. + */ +-static bool ++bool + xfs_is_falloc_aligned( + struct xfs_inode *ip, + loff_t pos, + long long int len) + { +- struct xfs_mount *mp = ip->i_mount; +- uint64_t mask; +- +- if (XFS_IS_REALTIME_INODE(ip)) { +- if (!is_power_of_2(mp->m_sb.sb_rextsize)) { +- u64 rextbytes; +- u32 mod; +- +- rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); +- div_u64_rem(pos, rextbytes, &mod); +- if (mod) +- return false; +- div_u64_rem(len, rextbytes, &mod); +- return mod == 0; +- } +- mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1; +- } else { +- mask = mp->m_sb.sb_blocksize - 1; ++ unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip); ++ ++ if (!is_power_of_2(alloc_unit)) { ++ u32 mod; ++ ++ div_u64_rem(pos, alloc_unit, &mod); ++ if (mod) ++ return false; ++ div_u64_rem(len, alloc_unit, &mod); ++ return mod == 0; + } + +- return !((pos | len) & mask); ++ return !((pos | len) & (alloc_unit - 1)); + } + + /* +diff --git a/fs/xfs/xfs_file.h b/fs/xfs/xfs_file.h +index 7d39e3eca56dc..2ad91f755caf3 100644 +--- a/fs/xfs/xfs_file.h ++++ b/fs/xfs/xfs_file.h +@@ -9,4 +9,7 @@ + extern const struct file_operations xfs_file_operations; + extern const struct file_operations xfs_dir_file_operations; + ++bool xfs_is_falloc_aligned(struct xfs_inode *ip, loff_t pos, ++ long long int len); ++ + #endif /* __XFS_FILE_H__ */ +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index 88d0a088fa862..3ccbc31767b3c 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -3775,3 +3775,16 @@ xfs_inode_reload_unlinked( + + return error; + } ++ ++/* Returns the size of fundamental allocation unit for a file, in bytes. */ ++unsigned int ++xfs_inode_alloc_unitsize( ++ struct xfs_inode *ip) ++{ ++ unsigned int blocks = 1; ++ ++ if (XFS_IS_REALTIME_INODE(ip)) ++ blocks = ip->i_mount->m_sb.sb_rextsize; ++ ++ return XFS_FSB_TO_B(ip->i_mount, blocks); ++} +diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h +index c177c92f3aa57..c4f426eadf8e2 100644 +--- a/fs/xfs/xfs_inode.h ++++ b/fs/xfs/xfs_inode.h +@@ -622,4 +622,6 @@ xfs_inode_unlinked_incomplete( + int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); + int xfs_inode_reload_unlinked(struct xfs_inode *ip); + ++unsigned int xfs_inode_alloc_unitsize(struct xfs_inode *ip); ++ + #endif /* __XFS_INODE_H__ */ +-- +2.39.5 + diff --git a/queue-6.1/xfs-declare-xfs_file.c-symbols-in-xfs_file.h.patch b/queue-6.1/xfs-declare-xfs_file.c-symbols-in-xfs_file.h.patch new file mode 100644 index 0000000000..8f537d35e4 --- /dev/null +++ b/queue-6.1/xfs-declare-xfs_file.c-symbols-in-xfs_file.h.patch @@ -0,0 +1,98 @@ +From 8677d8527555d7496b94b589a6c059c076f32795 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:16 -0700 +Subject: xfs: declare xfs_file.c symbols in xfs_file.h + +From: Darrick J. Wong + +[ Upstream commit 00acb28d96746f78389f23a7b5309a917b45c12f ] + +Move the two public symbols in xfs_file.c to xfs_file.h. We're about to +add more public symbols in that source file, so let's finally create the +header file. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_file.c | 1 + + fs/xfs/xfs_file.h | 12 ++++++++++++ + fs/xfs/xfs_ioctl.c | 1 + + fs/xfs/xfs_iops.c | 1 + + fs/xfs/xfs_iops.h | 3 --- + 5 files changed, 15 insertions(+), 3 deletions(-) + create mode 100644 fs/xfs/xfs_file.h + +diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c +index 821cb86a83bd5..6f7522977f7f7 100644 +--- a/fs/xfs/xfs_file.c ++++ b/fs/xfs/xfs_file.c +@@ -24,6 +24,7 @@ + #include "xfs_pnfs.h" + #include "xfs_iomap.h" + #include "xfs_reflink.h" ++#include "xfs_file.h" + + #include + #include +diff --git a/fs/xfs/xfs_file.h b/fs/xfs/xfs_file.h +new file mode 100644 +index 0000000000000..7d39e3eca56dc +--- /dev/null ++++ b/fs/xfs/xfs_file.h +@@ -0,0 +1,12 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Copyright (c) 2000-2005 Silicon Graphics, Inc. ++ * All Rights Reserved. ++ */ ++#ifndef __XFS_FILE_H__ ++#define __XFS_FILE_H__ ++ ++extern const struct file_operations xfs_file_operations; ++extern const struct file_operations xfs_dir_file_operations; ++ ++#endif /* __XFS_FILE_H__ */ +diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c +index c7cb496dc3458..1afb1b1b831ea 100644 +--- a/fs/xfs/xfs_ioctl.c ++++ b/fs/xfs/xfs_ioctl.c +@@ -38,6 +38,7 @@ + #include "xfs_reflink.h" + #include "xfs_ioctl.h" + #include "xfs_xattr.h" ++#include "xfs_file.h" + + #include + #include +diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c +index 6fbdc0a19e54c..9ca1b8bf1f053 100644 +--- a/fs/xfs/xfs_iops.c ++++ b/fs/xfs/xfs_iops.c +@@ -25,6 +25,7 @@ + #include "xfs_error.h" + #include "xfs_ioctl.h" + #include "xfs_xattr.h" ++#include "xfs_file.h" + + #include + #include +diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h +index e570dcb5df8d5..73ff92355eaa7 100644 +--- a/fs/xfs/xfs_iops.h ++++ b/fs/xfs/xfs_iops.h +@@ -8,9 +8,6 @@ + + struct xfs_inode; + +-extern const struct file_operations xfs_file_operations; +-extern const struct file_operations xfs_dir_file_operations; +- + extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); + + int xfs_vn_setattr_size(struct user_namespace *mnt_userns, +-- +2.39.5 + diff --git a/queue-6.1/xfs-don-t-walk-off-the-end-of-a-directory-data-block.patch b/queue-6.1/xfs-don-t-walk-off-the-end-of-a-directory-data-block.patch new file mode 100644 index 0000000000..e03991ab68 --- /dev/null +++ b/queue-6.1/xfs-don-t-walk-off-the-end-of-a-directory-data-block.patch @@ -0,0 +1,131 @@ +From 5dc2181975668635dd5aa8cd0d5745c8813ab103 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:20 -0700 +Subject: xfs: don't walk off the end of a directory data block + +From: lei lu + +[ Upstream commit 0c7fcdb6d06cdf8b19b57c17605215b06afa864a ] + +This adds sanity checks for xfs_dir2_data_unused and xfs_dir2_data_entry +to make sure don't stray beyond valid memory region. Before patching, the +loop simply checks that the start offset of the dup and dep is within the +range. So in a crafted image, if last entry is xfs_dir2_data_unused, we +can change dup->length to dup->length-1 and leave 1 byte of space. In the +next traversal, this space will be considered as dup or dep. We may +encounter an out of bound read when accessing the fixed members. + +In the patch, we make sure that the remaining bytes large enough to hold +an unused entry before accessing xfs_dir2_data_unused and +xfs_dir2_data_unused is XFS_DIR2_DATA_ALIGN byte aligned. We also make +sure that the remaining bytes large enough to hold a dirent with a +single-byte name before accessing xfs_dir2_data_entry. + +Signed-off-by: lei lu +Reviewed-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_dir2_data.c | 31 ++++++++++++++++++++++++++----- + fs/xfs/libxfs/xfs_dir2_priv.h | 7 +++++++ + 2 files changed, 33 insertions(+), 5 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c +index dbcf58979a598..e1d5da6d8d4a6 100644 +--- a/fs/xfs/libxfs/xfs_dir2_data.c ++++ b/fs/xfs/libxfs/xfs_dir2_data.c +@@ -177,6 +177,14 @@ __xfs_dir3_data_check( + while (offset < end) { + struct xfs_dir2_data_unused *dup = bp->b_addr + offset; + struct xfs_dir2_data_entry *dep = bp->b_addr + offset; ++ unsigned int reclen; ++ ++ /* ++ * Are the remaining bytes large enough to hold an ++ * unused entry? ++ */ ++ if (offset > end - xfs_dir2_data_unusedsize(1)) ++ return __this_address; + + /* + * If it's unused, look for the space in the bestfree table. +@@ -186,9 +194,13 @@ __xfs_dir3_data_check( + if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { + xfs_failaddr_t fa; + ++ reclen = xfs_dir2_data_unusedsize( ++ be16_to_cpu(dup->length)); + if (lastfree != 0) + return __this_address; +- if (offset + be16_to_cpu(dup->length) > end) ++ if (be16_to_cpu(dup->length) != reclen) ++ return __this_address; ++ if (offset + reclen > end) + return __this_address; + if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) != + offset) +@@ -206,10 +218,18 @@ __xfs_dir3_data_check( + be16_to_cpu(bf[2].length)) + return __this_address; + } +- offset += be16_to_cpu(dup->length); ++ offset += reclen; + lastfree = 1; + continue; + } ++ ++ /* ++ * This is not an unused entry. Are the remaining bytes ++ * large enough for a dirent with a single-byte name? ++ */ ++ if (offset > end - xfs_dir2_data_entsize(mp, 1)) ++ return __this_address; ++ + /* + * It's a real entry. Validate the fields. + * If this is a block directory then make sure it's +@@ -218,9 +238,10 @@ __xfs_dir3_data_check( + */ + if (dep->namelen == 0) + return __this_address; +- if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) ++ reclen = xfs_dir2_data_entsize(mp, dep->namelen); ++ if (offset + reclen > end) + return __this_address; +- if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end) ++ if (!xfs_verify_dir_ino(mp, be64_to_cpu(dep->inumber))) + return __this_address; + if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset) + return __this_address; +@@ -244,7 +265,7 @@ __xfs_dir3_data_check( + if (i >= be32_to_cpu(btp->count)) + return __this_address; + } +- offset += xfs_dir2_data_entsize(mp, dep->namelen); ++ offset += reclen; + } + /* + * Need to have seen all the entries and all the bestfree slots. +diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h +index 7404a9ff1a929..9046d08554e9e 100644 +--- a/fs/xfs/libxfs/xfs_dir2_priv.h ++++ b/fs/xfs/libxfs/xfs_dir2_priv.h +@@ -187,6 +187,13 @@ void xfs_dir2_sf_put_ftype(struct xfs_mount *mp, + extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp, + struct dir_context *ctx, size_t bufsize); + ++static inline unsigned int ++xfs_dir2_data_unusedsize( ++ unsigned int len) ++{ ++ return round_up(len, XFS_DIR2_DATA_ALIGN); ++} ++ + static inline unsigned int + xfs_dir2_data_entsize( + struct xfs_mount *mp, +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-an-agbno-overflow-in-__xfs_getfsmap_datadev.patch b/queue-6.1/xfs-fix-an-agbno-overflow-in-__xfs_getfsmap_datadev.patch new file mode 100644 index 0000000000..ac661b218f --- /dev/null +++ b/queue-6.1/xfs-fix-an-agbno-overflow-in-__xfs_getfsmap_datadev.patch @@ -0,0 +1,122 @@ +From 3e6ecdc1c5f3a9ba0ffb5d948798a3b1e22d38a7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:12 -0700 +Subject: xfs: fix an agbno overflow in __xfs_getfsmap_datadev + +From: Darrick J. Wong + +[ Upstream commit cfa2df68b7ceb49ac9eb2d295ab0c5974dbf17e7 ] + +Dave Chinner reported that xfs/273 fails if the AG size happens to be an +exact power of two. I traced this to an agbno integer overflow when the +current GETFSMAP call is a continuation of a previous GETFSMAP call, and +the last record returned was non-shareable space at the end of an AG. + +__xfs_getfsmap_datadev sets up a data device query by converting the +incoming fmr_physical into an xfs_fsblock_t and cracking it into an agno +and agbno pair. In the (failing) case of where fmr_blockcount of the +low key is nonzero and the record was for a non-shareable extent, it +will add fmr_blockcount to start_fsb and info->low.rm_startblock. + +If the low key was actually the last record for that AG, then this +addition causes info->low.rm_startblock to point beyond EOAG. When the +rmapbt range query starts, it'll return an empty set, and fsmap moves on +to the next AG. + +Or so I thought. Remember how we added to start_fsb? + +If agsize < 1<low and moves on to the next AG. + +If agsize == 1<high to EOFS (which is now has a lower rm_startblock than +info->low), and the ranged btree query code will return -EINVAL. If +it's not the last AG, we ignore all records for the intermediate AGs. + +Oops. + +Fix this by decoding start_fsb into agno and agbno only after making +adjustments to start_fsb. This means that info->low.rm_startblock will +always be set to a valid agbno, and we always start the rmapbt iteration +in the correct AG. + +While we're at it, fix the predicate for determining if an fsmap record +represents non-shareable space to include file data on pre-reflink +filesystems. + +Reported-by: Dave Chinner +Fixes: 63ef7a35912dd ("xfs: fix interval filtering in multi-step fsmap queries") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index d10f2c719220d..956a5670e56ce 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -565,6 +565,19 @@ xfs_getfsmap_rtdev_rtbitmap( + } + #endif /* CONFIG_XFS_RT */ + ++static inline bool ++rmap_not_shareable(struct xfs_mount *mp, const struct xfs_rmap_irec *r) ++{ ++ if (!xfs_has_reflink(mp)) ++ return true; ++ if (XFS_RMAP_NON_INODE_OWNER(r->rm_owner)) ++ return true; ++ if (r->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK | ++ XFS_RMAP_UNWRITTEN)) ++ return true; ++ return false; ++} ++ + /* Execute a getfsmap query against the regular data device. */ + STATIC int + __xfs_getfsmap_datadev( +@@ -598,7 +611,6 @@ __xfs_getfsmap_datadev( + * low to the fsmap low key and max out the high key to the end + * of the AG. + */ +- info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) +@@ -608,12 +620,9 @@ __xfs_getfsmap_datadev( + + /* Adjust the low key if we are continuing from where we left off. */ + if (info->low.rm_blockcount == 0) { +- /* empty */ +- } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || +- (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | +- XFS_RMAP_BMBT_BLOCK | +- XFS_RMAP_UNWRITTEN))) { +- info->low.rm_startblock += info->low.rm_blockcount; ++ /* No previous record from which to continue */ ++ } else if (rmap_not_shareable(mp, &info->low)) { ++ /* Last record seen was an unshareable extent */ + info->low.rm_owner = 0; + info->low.rm_offset = 0; + +@@ -621,8 +630,10 @@ __xfs_getfsmap_datadev( + if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) + return 0; + } else { ++ /* Last record seen was a shareable file data extent */ + info->low.rm_offset += info->low.rm_blockcount; + } ++ info->low.rm_startblock = XFS_FSB_TO_AGBNO(mp, start_fsb); + + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-getfsmap-reporting-past-the-last-rt-extent.patch b/queue-6.1/xfs-fix-getfsmap-reporting-past-the-last-rt-extent.patch new file mode 100644 index 0000000000..d2c09e0e0f --- /dev/null +++ b/queue-6.1/xfs-fix-getfsmap-reporting-past-the-last-rt-extent.patch @@ -0,0 +1,42 @@ +From c5618358b5c9ecdea894d0b77c43978f2cbcfc88 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:07 -0700 +Subject: xfs: fix getfsmap reporting past the last rt extent + +From: Darrick J. Wong + +[ Upstream commit d898137d789cac9ebe5eed9957e4cf25122ca524 ] + +The realtime section ends at the last rt extent. If the user configures +the rt geometry with an extent size that is not an integer factor of the +number of rt blocks, it's possible for there to be rt blocks past the +end of the last rt extent. These tail blocks cannot ever be allocated +and will cause corruption reports if the last extent coincides with the +end of an rt bitmap block, so do not report consider them for the +GETFSMAP output. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 5039d330ef98b..7b72992c14d94 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -529,7 +529,7 @@ __xfs_getfsmap_rtdev( + uint64_t eofs; + int error = 0; + +- eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); ++ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rextents * mp->m_sb.sb_rextsize); + if (keys[0].fmr_physical >= eofs) + return 0; + start_rtb = XFS_BB_TO_FSBT(mp, +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-integer-overflows-in-the-fsmap-rtbitmap-and-.patch b/queue-6.1/xfs-fix-integer-overflows-in-the-fsmap-rtbitmap-and-.patch new file mode 100644 index 0000000000..c7a56bc5ad --- /dev/null +++ b/queue-6.1/xfs-fix-integer-overflows-in-the-fsmap-rtbitmap-and-.patch @@ -0,0 +1,297 @@ +From 271c39a86f9d3f8eb09361272a6b9787404ae814 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:06 -0700 +Subject: xfs: fix integer overflows in the fsmap rtbitmap and logdev backends + +From: Darrick J. Wong + +[ Upstream commit 7975aba19cba4eba7ff60410f9294c90edc96dcf ] + +It's not correct to use the rmap irec structure to hold query key +information to query the rtbitmap because the realtime volume can be +longer than 2^32 fsblocks in length. Because the rt volume doesn't have +allocation groups, introduce a daddr-based record filtering algorithm +and compute the rtextent values using 64-bit variables. The same +problem exists in the external log device fsmap implementation, so use +the same solution to fix it too. + +After this patch, all the code that touches info->low and info->high +under xfs_getfsmap_logdev and __xfs_getfsmap_rtdev are unnecessary. +Cleaning this up will be done in subsequent patches. + +Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 90 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 64 insertions(+), 26 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 2011f1bf7ce0f..5039d330ef98b 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -160,6 +160,8 @@ struct xfs_getfsmap_info { + struct xfs_buf *agf_bp; /* AGF, for refcount queries */ + struct xfs_perag *pag; /* AG info, if applicable */ + xfs_daddr_t next_daddr; /* next daddr we expect */ ++ /* daddr of low fsmap key when we're using the rtbitmap */ ++ xfs_daddr_t low_daddr; + u64 missing_owner; /* owner of holes */ + u32 dev; /* device id */ + /* +@@ -250,6 +252,8 @@ xfs_getfsmap_rec_before_start( + const struct xfs_rmap_irec *rec, + xfs_daddr_t rec_daddr) + { ++ if (info->low_daddr != -1ULL) ++ return rec_daddr < info->low_daddr; + if (info->low.rm_blockcount) + return xfs_rmap_compare(rec, &info->low) < 0; + return false; +@@ -257,14 +261,16 @@ xfs_getfsmap_rec_before_start( + + /* + * Format a reverse mapping for getfsmap, having translated rm_startblock +- * into the appropriate daddr units. ++ * into the appropriate daddr units. Pass in a nonzero @len_daddr if the ++ * length could be larger than rm_blockcount in struct xfs_rmap_irec. + */ + STATIC int + xfs_getfsmap_helper( + struct xfs_trans *tp, + struct xfs_getfsmap_info *info, + const struct xfs_rmap_irec *rec, +- xfs_daddr_t rec_daddr) ++ xfs_daddr_t rec_daddr, ++ xfs_daddr_t len_daddr) + { + struct xfs_fsmap fmr; + struct xfs_mount *mp = tp->t_mountp; +@@ -274,12 +280,15 @@ xfs_getfsmap_helper( + if (fatal_signal_pending(current)) + return -EINTR; + ++ if (len_daddr == 0) ++ len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount); ++ + /* + * Filter out records that start before our startpoint, if the + * caller requested that. + */ + if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) { +- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); ++ rec_daddr += len_daddr; + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return 0; +@@ -298,7 +307,7 @@ xfs_getfsmap_helper( + + info->head->fmh_entries++; + +- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); ++ rec_daddr += len_daddr; + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return 0; +@@ -338,7 +347,7 @@ xfs_getfsmap_helper( + if (error) + return error; + fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset); +- fmr.fmr_length = XFS_FSB_TO_BB(mp, rec->rm_blockcount); ++ fmr.fmr_length = len_daddr; + if (rec->rm_flags & XFS_RMAP_UNWRITTEN) + fmr.fmr_flags |= FMR_OF_PREALLOC; + if (rec->rm_flags & XFS_RMAP_ATTR_FORK) +@@ -355,7 +364,7 @@ xfs_getfsmap_helper( + + xfs_getfsmap_format(mp, &fmr, info); + out: +- rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); ++ rec_daddr += len_daddr; + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; + return 0; +@@ -376,7 +385,7 @@ xfs_getfsmap_datadev_helper( + fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock); + rec_daddr = XFS_FSB_TO_DADDR(mp, fsb); + +- return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr); ++ return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0); + } + + /* Transform a bnobt irec into a fsmap */ +@@ -400,7 +409,7 @@ xfs_getfsmap_datadev_bnobt_helper( + irec.rm_offset = 0; + irec.rm_flags = 0; + +- return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr); ++ return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0); + } + + /* Set rmap flags based on the getfsmap flags */ +@@ -427,9 +436,13 @@ xfs_getfsmap_logdev( + { + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; ++ xfs_daddr_t rec_daddr, len_daddr; ++ xfs_fsblock_t start_fsb; + int error; + + /* Set up search keys */ ++ start_fsb = XFS_BB_TO_FSBT(mp, ++ keys[0].fmr_physical + keys[0].fmr_length); + info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); + info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); + error = xfs_fsmap_owner_to_rmap(&info->low, keys); +@@ -438,6 +451,10 @@ xfs_getfsmap_logdev( + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + ++ /* Adjust the low key if we are continuing from where we left off. */ ++ if (keys[0].fmr_length > 0) ++ info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); ++ + error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); + if (error) + return error; +@@ -451,7 +468,7 @@ xfs_getfsmap_logdev( + trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); + +- if (keys[0].fmr_physical > 0) ++ if (start_fsb > 0) + return 0; + + /* Fabricate an rmap entry for the external log device. */ +@@ -461,7 +478,9 @@ xfs_getfsmap_logdev( + rmap.rm_offset = 0; + rmap.rm_flags = 0; + +- return xfs_getfsmap_helper(tp, info, &rmap, 0); ++ rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock); ++ len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount); ++ return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr); + } + + #ifdef CONFIG_XFS_RT +@@ -475,16 +494,22 @@ xfs_getfsmap_rtdev_rtbitmap_helper( + { + struct xfs_getfsmap_info *info = priv; + struct xfs_rmap_irec irec; +- xfs_daddr_t rec_daddr; ++ xfs_rtblock_t rtbno; ++ xfs_daddr_t rec_daddr, len_daddr; ++ ++ rtbno = rec->ar_startext * mp->m_sb.sb_rextsize; ++ rec_daddr = XFS_FSB_TO_BB(mp, rtbno); ++ irec.rm_startblock = rtbno; ++ ++ rtbno = rec->ar_extcount * mp->m_sb.sb_rextsize; ++ len_daddr = XFS_FSB_TO_BB(mp, rtbno); ++ irec.rm_blockcount = rtbno; + +- irec.rm_startblock = rec->ar_startext * mp->m_sb.sb_rextsize; +- rec_daddr = XFS_FSB_TO_BB(mp, irec.rm_startblock); +- irec.rm_blockcount = rec->ar_extcount * mp->m_sb.sb_rextsize; + irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */ + irec.rm_offset = 0; + irec.rm_flags = 0; + +- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr); ++ return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr); + } + + /* Execute a getfsmap query against the realtime device. */ +@@ -493,23 +518,26 @@ __xfs_getfsmap_rtdev( + struct xfs_trans *tp, + const struct xfs_fsmap *keys, + int (*query_fn)(struct xfs_trans *, +- struct xfs_getfsmap_info *), ++ struct xfs_getfsmap_info *, ++ xfs_rtblock_t start_rtb, ++ xfs_rtblock_t end_rtb), + struct xfs_getfsmap_info *info) + { + struct xfs_mount *mp = tp->t_mountp; +- xfs_fsblock_t start_fsb; +- xfs_fsblock_t end_fsb; ++ xfs_rtblock_t start_rtb; ++ xfs_rtblock_t end_rtb; + uint64_t eofs; + int error = 0; + + eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks); + if (keys[0].fmr_physical >= eofs) + return 0; +- start_fsb = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); +- end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); ++ start_rtb = XFS_BB_TO_FSBT(mp, ++ keys[0].fmr_physical + keys[0].fmr_length); ++ end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + /* Set up search keys */ +- info->low.rm_startblock = start_fsb; ++ info->low.rm_startblock = start_rtb; + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; +@@ -517,7 +545,14 @@ __xfs_getfsmap_rtdev( + info->low.rm_blockcount = 0; + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + +- info->high.rm_startblock = end_fsb; ++ /* Adjust the low key if we are continuing from where we left off. */ ++ if (keys[0].fmr_length > 0) { ++ info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb); ++ if (info->low_daddr >= eofs) ++ return 0; ++ } ++ ++ info->high.rm_startblock = end_rtb; + error = xfs_fsmap_owner_to_rmap(&info->high, &keys[1]); + if (error) + return error; +@@ -528,14 +563,16 @@ __xfs_getfsmap_rtdev( + trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); + trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); + +- return query_fn(tp, info); ++ return query_fn(tp, info, start_rtb, end_rtb); + } + + /* Actually query the realtime bitmap. */ + STATIC int + xfs_getfsmap_rtdev_rtbitmap_query( + struct xfs_trans *tp, +- struct xfs_getfsmap_info *info) ++ struct xfs_getfsmap_info *info, ++ xfs_rtblock_t start_rtb, ++ xfs_rtblock_t end_rtb) + { + struct xfs_rtalloc_rec alow = { 0 }; + struct xfs_rtalloc_rec ahigh = { 0 }; +@@ -548,8 +585,8 @@ xfs_getfsmap_rtdev_rtbitmap_query( + * Set up query parameters to return free rtextents covering the range + * we want. + */ +- alow.ar_startext = info->low.rm_startblock; +- ahigh.ar_startext = info->high.rm_startblock; ++ alow.ar_startext = start_rtb; ++ ahigh.ar_startext = end_rtb; + do_div(alow.ar_startext, mp->m_sb.sb_rextsize); + if (do_div(ahigh.ar_startext, mp->m_sb.sb_rextsize)) + ahigh.ar_startext++; +@@ -988,6 +1025,7 @@ xfs_getfsmap( + info.dev = handlers[i].dev; + info.last = false; + info.pag = NULL; ++ info.low_daddr = -1ULL; + info.low.rm_blockcount = 0; + error = handlers[i].fn(tp, dkeys, &info); + if (error) +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-interval-filtering-in-multi-step-fsmap-queri.patch b/queue-6.1/xfs-fix-interval-filtering-in-multi-step-fsmap-queri.patch new file mode 100644 index 0000000000..6307c543ee --- /dev/null +++ b/queue-6.1/xfs-fix-interval-filtering-in-multi-step-fsmap-queri.patch @@ -0,0 +1,189 @@ +From 20ca80eedc27eb2274362eb8d166f06974632965 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:05 -0700 +Subject: xfs: fix interval filtering in multi-step fsmap queries + +From: Darrick J. Wong + +[ Upstream commit 63ef7a35912dd743cabd65d5bb95891625c0dd46 ] + +I noticed a bug in ranged GETFSMAP queries: + +# xfs_io -c 'fsmap -vvvv' /opt + EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL + 0: 8:80 [0..7]: static fs metadata 0 (0..7) 8 + + 9: 8:80 [192..223]: 137 0..31 0 (192..223) 32 +# xfs_io -c 'fsmap -vvvv -d 208 208' /opt +# + +That's not right -- we asked what block maps block 208, and we should've +received a mapping for inode 137 offset 16. Instead, we get nothing. + +The root cause of this problem is a mis-interaction between the fsmap +code and how btree ranged queries work. xfs_btree_query_range returns +any btree record that overlaps with the query interval, even if the +record starts before or ends after the interval. Similarly, GETFSMAP is +supposed to return a recordset containing all records that overlap the +range queried. + +However, it's possible that the recordset is larger than the buffer that +the caller provided to convey mappings to userspace. In /that/ case, +userspace is supposed to copy the last record returned to fmh_keys[0] +and call GETFSMAP again. In this case, we do not want to return +mappings that we have already supplied to the caller. The call to +xfs_btree_query_range is the same, but now we ignore any records that +start before fmh_keys[0]. + +Unfortunately, we didn't implement the filtering predicate correctly. +The predicate should only be called when we're calling back for more +records. Accomplish this by setting info->low.rm_blockcount to a +nonzero value and ensuring that it is cleared as necessary. As a +result, we no longer want to adjust dkeys[0] in the main setup function +because that's confusing. + +This patch doesn't touch the logdev/rtbitmap backends because they have +bigger problems that will be addressed by subsequent patches. + +Found via xfs/556 with parent pointers enabled. + +Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 67 +++++++++++++++++++++++++++++++++------------- + 1 file changed, 48 insertions(+), 19 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index a5b9754c62d1c..2011f1bf7ce0f 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -162,7 +162,14 @@ struct xfs_getfsmap_info { + xfs_daddr_t next_daddr; /* next daddr we expect */ + u64 missing_owner; /* owner of holes */ + u32 dev; /* device id */ +- struct xfs_rmap_irec low; /* low rmap key */ ++ /* ++ * Low rmap key for the query. If low.rm_blockcount is nonzero, this ++ * is the second (or later) call to retrieve the recordset in pieces. ++ * xfs_getfsmap_rec_before_start will compare all records retrieved ++ * by the rmapbt query to filter out any records that start before ++ * the last record. ++ */ ++ struct xfs_rmap_irec low; + struct xfs_rmap_irec high; /* high rmap key */ + bool last; /* last extent? */ + }; +@@ -237,6 +244,17 @@ xfs_getfsmap_format( + xfs_fsmap_from_internal(rec, xfm); + } + ++static inline bool ++xfs_getfsmap_rec_before_start( ++ struct xfs_getfsmap_info *info, ++ const struct xfs_rmap_irec *rec, ++ xfs_daddr_t rec_daddr) ++{ ++ if (info->low.rm_blockcount) ++ return xfs_rmap_compare(rec, &info->low) < 0; ++ return false; ++} ++ + /* + * Format a reverse mapping for getfsmap, having translated rm_startblock + * into the appropriate daddr units. +@@ -260,7 +278,7 @@ xfs_getfsmap_helper( + * Filter out records that start before our startpoint, if the + * caller requested that. + */ +- if (xfs_rmap_compare(rec, &info->low) < 0) { ++ if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) { + rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount); + if (info->next_daddr < rec_daddr) + info->next_daddr = rec_daddr; +@@ -606,9 +624,27 @@ __xfs_getfsmap_datadev( + error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]); + if (error) + return error; +- info->low.rm_blockcount = 0; ++ info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length); + xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); + ++ /* Adjust the low key if we are continuing from where we left off. */ ++ if (info->low.rm_blockcount == 0) { ++ /* empty */ ++ } else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) || ++ (info->low.rm_flags & (XFS_RMAP_ATTR_FORK | ++ XFS_RMAP_BMBT_BLOCK | ++ XFS_RMAP_UNWRITTEN))) { ++ info->low.rm_startblock += info->low.rm_blockcount; ++ info->low.rm_owner = 0; ++ info->low.rm_offset = 0; ++ ++ start_fsb += info->low.rm_blockcount; ++ if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs) ++ return 0; ++ } else { ++ info->low.rm_offset += info->low.rm_blockcount; ++ } ++ + info->high.rm_startblock = -1U; + info->high.rm_owner = ULLONG_MAX; + info->high.rm_offset = ULLONG_MAX; +@@ -659,12 +695,8 @@ __xfs_getfsmap_datadev( + * Set the AG low key to the start of the AG prior to + * moving on to the next AG. + */ +- if (pag->pag_agno == start_ag) { +- info->low.rm_startblock = 0; +- info->low.rm_owner = 0; +- info->low.rm_offset = 0; +- info->low.rm_flags = 0; +- } ++ if (pag->pag_agno == start_ag) ++ memset(&info->low, 0, sizeof(info->low)); + + /* + * If this is the last AG, report any gap at the end of it +@@ -901,21 +933,17 @@ xfs_getfsmap( + * blocks could be mapped to several other files/offsets. + * According to rmapbt record ordering, the minimal next + * possible record for the block range is the next starting +- * offset in the same inode. Therefore, bump the file offset to +- * continue the search appropriately. For all other low key +- * mapping types (attr blocks, metadata), bump the physical +- * offset as there can be no other mapping for the same physical +- * block range. ++ * offset in the same inode. Therefore, each fsmap backend bumps ++ * the file offset to continue the search appropriately. For ++ * all other low key mapping types (attr blocks, metadata), each ++ * fsmap backend bumps the physical offset as there can be no ++ * other mapping for the same physical block range. + */ + dkeys[0] = head->fmh_keys[0]; + if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { +- dkeys[0].fmr_physical += dkeys[0].fmr_length; +- dkeys[0].fmr_owner = 0; + if (dkeys[0].fmr_offset) + return -EINVAL; +- } else +- dkeys[0].fmr_offset += dkeys[0].fmr_length; +- dkeys[0].fmr_length = 0; ++ } + memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); + + if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) +@@ -960,6 +988,7 @@ xfs_getfsmap( + info.dev = handlers[i].dev; + info.last = false; + info.pag = NULL; ++ info.low.rm_blockcount = 0; + error = handlers[i].fn(tp, dkeys, &info); + if (error) + break; +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-logdev-fsmap-query-result-filtering.patch b/queue-6.1/xfs-fix-logdev-fsmap-query-result-filtering.patch new file mode 100644 index 0000000000..524879783a --- /dev/null +++ b/queue-6.1/xfs-fix-logdev-fsmap-query-result-filtering.patch @@ -0,0 +1,80 @@ +From 6699bcd6de5745bb295a74726e51d91b0dbc7269 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:09 -0700 +Subject: xfs: fix logdev fsmap query result filtering + +From: Darrick J. Wong + +[ Upstream commit a949a1c2a198e048630a8b0741a99b85a5d88136 ] + +The external log device fsmap backend doesn't have an rmapbt to query, +so it's wasteful to spend time initializing the rmap_irec objects. +Worse yet, the log could (someday) be longer than 2^32 fsblocks, so +using the rmap irec structure will result in integer overflows. + +Fix this mess by computing the start address that we want from keys[0] +directly, and use the daddr-based record filtering algorithm that we +also use for rtbitmap queries. + +Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl") +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 30 ++++++++---------------------- + 1 file changed, 8 insertions(+), 22 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 202f162515bd5..cdd806d80b7cf 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -437,36 +437,22 @@ xfs_getfsmap_logdev( + struct xfs_mount *mp = tp->t_mountp; + struct xfs_rmap_irec rmap; + xfs_daddr_t rec_daddr, len_daddr; +- xfs_fsblock_t start_fsb; +- int error; ++ xfs_fsblock_t start_fsb, end_fsb; ++ uint64_t eofs; + +- /* Set up search keys */ ++ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); ++ if (keys[0].fmr_physical >= eofs) ++ return 0; + start_fsb = XFS_BB_TO_FSBT(mp, + keys[0].fmr_physical + keys[0].fmr_length); +- info->low.rm_startblock = XFS_BB_TO_FSBT(mp, keys[0].fmr_physical); +- info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset); +- error = xfs_fsmap_owner_to_rmap(&info->low, keys); +- if (error) +- return error; +- info->low.rm_blockcount = 0; +- xfs_getfsmap_set_irec_flags(&info->low, &keys[0]); ++ end_fsb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical)); + + /* Adjust the low key if we are continuing from where we left off. */ + if (keys[0].fmr_length > 0) + info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb); + +- error = xfs_fsmap_owner_to_rmap(&info->high, keys + 1); +- if (error) +- return error; +- info->high.rm_startblock = -1U; +- info->high.rm_owner = ULLONG_MAX; +- info->high.rm_offset = ULLONG_MAX; +- info->high.rm_blockcount = 0; +- info->high.rm_flags = XFS_RMAP_KEY_FLAGS | XFS_RMAP_REC_FLAGS; +- info->missing_owner = XFS_FMR_OWN_FREE; +- +- trace_xfs_fsmap_low_key(mp, info->dev, NULLAGNUMBER, &info->low); +- trace_xfs_fsmap_high_key(mp, info->dev, NULLAGNUMBER, &info->high); ++ trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb); ++ trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb); + + if (start_fsb > 0) + return 0; +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-the-contact-address-for-the-sysfs-abi-docume.patch b/queue-6.1/xfs-fix-the-contact-address-for-the-sysfs-abi-docume.patch new file mode 100644 index 0000000000..c5701e525c --- /dev/null +++ b/queue-6.1/xfs-fix-the-contact-address-for-the-sysfs-abi-docume.patch @@ -0,0 +1,64 @@ +From ea2e94fdccb44c818646c749fec8fce5fd5bf6ff Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:13 -0700 +Subject: xfs: fix the contact address for the sysfs ABI documentation + +From: Christoph Hellwig + +[ Upstream commit 9ff4490e2ab364ec433f15668ef3f5edfb53feca ] + +oss.sgi.com is long dead, refer to the current linux-xfs list instead. + +Signed-off-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + Documentation/ABI/testing/sysfs-fs-xfs | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/Documentation/ABI/testing/sysfs-fs-xfs b/Documentation/ABI/testing/sysfs-fs-xfs +index f704925f6fe93..82d8e2f79834b 100644 +--- a/Documentation/ABI/testing/sysfs-fs-xfs ++++ b/Documentation/ABI/testing/sysfs-fs-xfs +@@ -1,7 +1,7 @@ + What: /sys/fs/xfs//log/log_head_lsn + Date: July 2014 + KernelVersion: 3.17 +-Contact: xfs@oss.sgi.com ++Contact: linux-xfs@vger.kernel.org + Description: + The log sequence number (LSN) of the current head of the + log. The LSN is exported in "cycle:basic block" format. +@@ -10,7 +10,7 @@ Users: xfstests + What: /sys/fs/xfs//log/log_tail_lsn + Date: July 2014 + KernelVersion: 3.17 +-Contact: xfs@oss.sgi.com ++Contact: linux-xfs@vger.kernel.org + Description: + The log sequence number (LSN) of the current tail of the + log. The LSN is exported in "cycle:basic block" format. +@@ -18,7 +18,7 @@ Description: + What: /sys/fs/xfs//log/reserve_grant_head + Date: July 2014 + KernelVersion: 3.17 +-Contact: xfs@oss.sgi.com ++Contact: linux-xfs@vger.kernel.org + Description: + The current state of the log reserve grant head. It + represents the total log reservation of all currently +@@ -29,7 +29,7 @@ Users: xfstests + What: /sys/fs/xfs//log/write_grant_head + Date: July 2014 + KernelVersion: 3.17 +-Contact: xfs@oss.sgi.com ++Contact: linux-xfs@vger.kernel.org + Description: + The current state of the log write grant head. It + represents the total log reservation of all currently +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-the-owner-setting-issue-for-rmap-query-in-xf.patch b/queue-6.1/xfs-fix-the-owner-setting-issue-for-rmap-query-in-xf.patch new file mode 100644 index 0000000000..bf38ae1d56 --- /dev/null +++ b/queue-6.1/xfs-fix-the-owner-setting-issue-for-rmap-query-in-xf.patch @@ -0,0 +1,86 @@ +From 6accbdacc11fb419285ee87c2a35949bf87ac6f7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:24 -0700 +Subject: xfs: Fix the owner setting issue for rmap query in xfs fsmap + +From: Zizhi Wo + +[ Upstream commit 68415b349f3f16904f006275757f4fcb34b8ee43 ] + +I notice a rmap query bug in xfs_io fsmap: +[root@fedora ~]# xfs_io -c 'fsmap -vvvv' /mnt + EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL + 0: 253:16 [0..7]: static fs metadata 0 (0..7) 8 + 1: 253:16 [8..23]: per-AG metadata 0 (8..23) 16 + 2: 253:16 [24..39]: inode btree 0 (24..39) 16 + 3: 253:16 [40..47]: per-AG metadata 0 (40..47) 8 + 4: 253:16 [48..55]: refcount btree 0 (48..55) 8 + 5: 253:16 [56..103]: per-AG metadata 0 (56..103) 48 + 6: 253:16 [104..127]: free space 0 (104..127) 24 + ...... + +Bug: +[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt +[root@fedora ~]# +Normally, we should be able to get one record, but we got nothing. + +The root cause of this problem lies in the incorrect setting of rm_owner in +the rmap query. In the case of the initial query where the owner is not +set, __xfs_getfsmap_datadev() first sets info->high.rm_owner to ULLONG_MAX. +This is done to prevent any omissions when comparing rmap items. However, +if the current ag is detected to be the last one, the function sets info's +high_irec based on the provided key. If high->rm_owner is not specified, it +should continue to be set to ULLONG_MAX; otherwise, there will be issues +with interval omissions. For example, consider "start" and "end" within the +same block. If high->rm_owner == 0, it will be smaller than the founded +record in rmapbt, resulting in a query with no records. The main call stack +is as follows: + +xfs_ioc_getfsmap + xfs_getfsmap + xfs_getfsmap_datadev_rmapbt + __xfs_getfsmap_datadev + info->high.rm_owner = ULLONG_MAX + if (pag->pag_agno == end_ag) + xfs_fsmap_owner_to_rmap + // set info->high.rm_owner = 0 because fmr_owner == -1ULL + dest->rm_owner = 0 + // get nothing + xfs_getfsmap_datadev_rmapbt_query + +The problem can be resolved by simply modify the xfs_fsmap_owner_to_rmap +function internal logic to achieve. + +After applying this patch, the above problem have been solved: +[root@fedora ~]# xfs_io -c 'fsmap -vvvv -d 0 3' /mnt + EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL + 0: 253:16 [0..7]: static fs metadata 0 (0..7) 8 + +Fixes: e89c041338ed ("xfs: implement the GETFSMAP ioctl") +Signed-off-by: Zizhi Wo +Reviewed-by: Darrick J. Wong +Signed-off-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 956a5670e56ce..1efd18437ca4c 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -71,7 +71,7 @@ xfs_fsmap_owner_to_rmap( + switch (src->fmr_owner) { + case 0: /* "lowest owner id possible" */ + case -1ULL: /* "highest owner id possible" */ +- dest->rm_owner = 0; ++ dest->rm_owner = src->fmr_owner; + break; + case XFS_FMR_OWN_FREE: + dest->rm_owner = XFS_RMAP_OWN_NULL; +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-xfs_btree_query_range-callers-to-initialize-.patch b/queue-6.1/xfs-fix-xfs_btree_query_range-callers-to-initialize-.patch new file mode 100644 index 0000000000..a54c29a895 --- /dev/null +++ b/queue-6.1/xfs-fix-xfs_btree_query_range-callers-to-initialize-.patch @@ -0,0 +1,103 @@ +From df442dd8e32a2fb412fbd722e84a385f1f3147eb Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:11 -0700 +Subject: xfs: fix xfs_btree_query_range callers to initialize btree rec fully + +From: Darrick J. Wong + +[ Upstream commit 75dc0345312221971903b2e28279b7e24b7dbb1b ] + +Use struct initializers to ensure that the xfs_btree_irecs passed into +the query_range function are completely initialized. No functional +changes, just closing some sloppy hygiene. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_alloc.c | 10 +++------- + fs/xfs/libxfs/xfs_refcount.c | 13 +++++++------ + fs/xfs/libxfs/xfs_rmap.c | 10 +++------- + 3 files changed, 13 insertions(+), 20 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c +index c08265f191368..cd5b197d70464 100644 +--- a/fs/xfs/libxfs/xfs_alloc.c ++++ b/fs/xfs/libxfs/xfs_alloc.c +@@ -3545,15 +3545,11 @@ xfs_alloc_query_range( + xfs_alloc_query_range_fn fn, + void *priv) + { +- union xfs_btree_irec low_brec; +- union xfs_btree_irec high_brec; +- struct xfs_alloc_query_range_info query; ++ union xfs_btree_irec low_brec = { .a = *low_rec }; ++ union xfs_btree_irec high_brec = { .a = *high_rec }; ++ struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn }; + + ASSERT(cur->bc_btnum == XFS_BTNUM_BNO); +- low_brec.a = *low_rec; +- high_brec.a = *high_rec; +- query.priv = priv; +- query.fn = fn; + return xfs_btree_query_range(cur, &low_brec, &high_brec, + xfs_alloc_query_range_helper, &query); + } +diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c +index 4ec7a81dd3eff..7e16e76fd2e18 100644 +--- a/fs/xfs/libxfs/xfs_refcount.c ++++ b/fs/xfs/libxfs/xfs_refcount.c +@@ -1903,8 +1903,13 @@ xfs_refcount_recover_cow_leftovers( + struct xfs_buf *agbp; + struct xfs_refcount_recovery *rr, *n; + struct list_head debris; +- union xfs_btree_irec low; +- union xfs_btree_irec high; ++ union xfs_btree_irec low = { ++ .rc.rc_domain = XFS_REFC_DOMAIN_COW, ++ }; ++ union xfs_btree_irec high = { ++ .rc.rc_domain = XFS_REFC_DOMAIN_COW, ++ .rc.rc_startblock = -1U, ++ }; + xfs_fsblock_t fsb; + int error; + +@@ -1935,10 +1940,6 @@ xfs_refcount_recover_cow_leftovers( + cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag); + + /* Find all the leftover CoW staging extents. */ +- memset(&low, 0, sizeof(low)); +- memset(&high, 0, sizeof(high)); +- low.rc.rc_domain = high.rc.rc_domain = XFS_REFC_DOMAIN_COW; +- high.rc.rc_startblock = -1U; + error = xfs_btree_query_range(cur, &low, &high, + xfs_refcount_recover_extent, &debris); + xfs_btree_del_cursor(cur, error); +diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c +index b56aca1e7c66c..95d3599561cea 100644 +--- a/fs/xfs/libxfs/xfs_rmap.c ++++ b/fs/xfs/libxfs/xfs_rmap.c +@@ -2337,14 +2337,10 @@ xfs_rmap_query_range( + xfs_rmap_query_range_fn fn, + void *priv) + { +- union xfs_btree_irec low_brec; +- union xfs_btree_irec high_brec; +- struct xfs_rmap_query_range_info query; ++ union xfs_btree_irec low_brec = { .r = *low_rec }; ++ union xfs_btree_irec high_brec = { .r = *high_rec }; ++ struct xfs_rmap_query_range_info query = { .priv = priv, .fn = fn }; + +- low_brec.r = *low_rec; +- high_brec.r = *high_rec; +- query.priv = priv; +- query.fn = fn; + return xfs_btree_query_range(cur, &low_brec, &high_brec, + xfs_rmap_query_range_helper, &query); + } +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-xfs_flush_unmap_range-range-for-rt.patch b/queue-6.1/xfs-fix-xfs_flush_unmap_range-range-for-rt.patch new file mode 100644 index 0000000000..b1839a12df --- /dev/null +++ b/queue-6.1/xfs-fix-xfs_flush_unmap_range-range-for-rt.patch @@ -0,0 +1,55 @@ +From 48d94275b24efc1d768346d3b897681735b6e9ae Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:18 -0700 +Subject: xfs: Fix xfs_flush_unmap_range() range for RT + +From: John Garry + +[ Upstream commit d3b689d7c711a9f36d3e48db9eaa75784a892f4c ] + +Currently xfs_flush_unmap_range() does unmap for a full RT extent range, +which we also want to ensure is clean and idle. + +This code change is originally from Dave Chinner. + +Reviewed-by: Christoph Hellwig 4 +Reviewed-by: Darrick J. Wong +Signed-off-by: John Garry +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_bmap_util.c | 12 ++++++++---- + 1 file changed, 8 insertions(+), 4 deletions(-) + +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index 62b92e92a685d..dabae6323c503 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -963,14 +963,18 @@ xfs_flush_unmap_range( + xfs_off_t offset, + xfs_off_t len) + { +- struct xfs_mount *mp = ip->i_mount; + struct inode *inode = VFS_I(ip); + xfs_off_t rounding, start, end; + int error; + +- rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE); +- start = round_down(offset, rounding); +- end = round_up(offset + len, rounding) - 1; ++ /* ++ * Make sure we extend the flush out to extent alignment ++ * boundaries so any extent range overlapping the start/end ++ * of the modification we are about to do is clean and idle. ++ */ ++ rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE); ++ start = rounddown_64(offset, rounding); ++ end = roundup_64(offset + len, rounding) - 1; + + error = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (error) +-- +2.39.5 + diff --git a/queue-6.1/xfs-fix-xfs_prepare_shift-range-for-rt.patch b/queue-6.1/xfs-fix-xfs_prepare_shift-range-for-rt.patch new file mode 100644 index 0000000000..e77cff57cb --- /dev/null +++ b/queue-6.1/xfs-fix-xfs_prepare_shift-range-for-rt.patch @@ -0,0 +1,58 @@ +From 58749ff8812d2c46ac1b5c4259e845b2cd617838 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:19 -0700 +Subject: xfs: Fix xfs_prepare_shift() range for RT + +From: John Garry + +[ Upstream commit f23660f059470ec7043748da7641e84183c23bc8 ] + +The RT extent range must be considered in the xfs_flush_unmap_range() call +to stabilize the boundary. + +This code change is originally from Dave Chinner. + +Reviewed-by: Christoph Hellwig +Reviewed-by: Darrick J. Wong +Signed-off-by: John Garry +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_bmap_util.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c +index dabae6323c503..bab8ba224e10d 100644 +--- a/fs/xfs/xfs_bmap_util.c ++++ b/fs/xfs/xfs_bmap_util.c +@@ -1059,7 +1059,7 @@ xfs_prepare_shift( + struct xfs_inode *ip, + loff_t offset) + { +- struct xfs_mount *mp = ip->i_mount; ++ unsigned int rounding; + int error; + + /* +@@ -1077,11 +1077,13 @@ xfs_prepare_shift( + * with the full range of the operation. If we don't, a COW writeback + * completion could race with an insert, front merge with the start + * extent (after split) during the shift and corrupt the file. Start +- * with the block just prior to the start to stabilize the boundary. ++ * with the allocation unit just prior to the start to stabilize the ++ * boundary. + */ +- offset = round_down(offset, mp->m_sb.sb_blocksize); ++ rounding = xfs_inode_alloc_unitsize(ip); ++ offset = rounddown_64(offset, rounding); + if (offset) +- offset -= mp->m_sb.sb_blocksize; ++ offset -= rounding; + + /* + * Writeback and invalidate cache for the remainder of the file as we're +-- +2.39.5 + diff --git a/queue-6.1/xfs-remove-unused-parameter-in-macro-xfs_dquot_logre.patch b/queue-6.1/xfs-remove-unused-parameter-in-macro-xfs_dquot_logre.patch new file mode 100644 index 0000000000..2cedc59d76 --- /dev/null +++ b/queue-6.1/xfs-remove-unused-parameter-in-macro-xfs_dquot_logre.patch @@ -0,0 +1,163 @@ +From 6c525207b47e97c716f450e22ad810846234eb5b Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:21 -0700 +Subject: xfs: remove unused parameter in macro XFS_DQUOT_LOGRES + +From: Julian Sun + +[ Upstream commit af5d92f2fad818663da2ce073b6fe15b9d56ffdc ] + +In the macro definition of XFS_DQUOT_LOGRES, a parameter is accepted, +but it is not used. Hence, it should be removed. + +This patch has only passed compilation test, but it should be fine. + +Signed-off-by: Julian Sun +Reviewed-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/libxfs/xfs_quota_defs.h | 2 +- + fs/xfs/libxfs/xfs_trans_resv.c | 28 ++++++++++++++-------------- + 2 files changed, 15 insertions(+), 15 deletions(-) + +diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h +index cb035da3f990b..fb05f44f6c754 100644 +--- a/fs/xfs/libxfs/xfs_quota_defs.h ++++ b/fs/xfs/libxfs/xfs_quota_defs.h +@@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; + * And, of course, we also need to take into account the dquot log format item + * used to describe each dquot. + */ +-#define XFS_DQUOT_LOGRES(mp) \ ++#define XFS_DQUOT_LOGRES \ + ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) + + #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) +diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c +index 5b2f27cbdb808..1bb2891b26ffb 100644 +--- a/fs/xfs/libxfs/xfs_trans_resv.c ++++ b/fs/xfs/libxfs/xfs_trans_resv.c +@@ -334,11 +334,11 @@ xfs_calc_write_reservation( + blksz); + t1 += adj; + t3 += adj; +- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); ++ return XFS_DQUOT_LOGRES + max3(t1, t2, t3); + } + + t4 = xfs_calc_refcountbt_reservation(mp, 1); +- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); ++ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); + } + + unsigned int +@@ -406,11 +406,11 @@ xfs_calc_itruncate_reservation( + xfs_refcountbt_block_count(mp, 4), + blksz); + +- return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); ++ return XFS_DQUOT_LOGRES + max3(t1, t2, t3); + } + + t4 = xfs_calc_refcountbt_reservation(mp, 2); +- return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); ++ return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); + } + + unsigned int +@@ -436,7 +436,7 @@ STATIC uint + xfs_calc_rename_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + max((xfs_calc_inode_res(mp, 5) + + xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), + XFS_FSB_TO_B(mp, 1))), +@@ -475,7 +475,7 @@ STATIC uint + xfs_calc_link_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_iunlink_remove_reservation(mp) + + max((xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), +@@ -513,7 +513,7 @@ STATIC uint + xfs_calc_remove_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_iunlink_add_reservation(mp) + + max((xfs_calc_inode_res(mp, 2) + + xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), +@@ -572,7 +572,7 @@ xfs_calc_icreate_resv_alloc( + STATIC uint + xfs_calc_icreate_reservation(xfs_mount_t *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + max(xfs_calc_icreate_resv_alloc(mp), + xfs_calc_create_resv_modify(mp)); + } +@@ -581,7 +581,7 @@ STATIC uint + xfs_calc_create_tmpfile_reservation( + struct xfs_mount *mp) + { +- uint res = XFS_DQUOT_LOGRES(mp); ++ uint res = XFS_DQUOT_LOGRES; + + res += xfs_calc_icreate_resv_alloc(mp); + return res + xfs_calc_iunlink_add_reservation(mp); +@@ -630,7 +630,7 @@ STATIC uint + xfs_calc_ifree_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + + xfs_calc_iunlink_remove_reservation(mp) + +@@ -647,7 +647,7 @@ STATIC uint + xfs_calc_ichange_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); + +@@ -756,7 +756,7 @@ STATIC uint + xfs_calc_addafork_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + +@@ -804,7 +804,7 @@ STATIC uint + xfs_calc_attrsetm_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); +@@ -844,7 +844,7 @@ STATIC uint + xfs_calc_attrrm_reservation( + struct xfs_mount *mp) + { +- return XFS_DQUOT_LOGRES(mp) + ++ return XFS_DQUOT_LOGRES + + max((xfs_calc_inode_res(mp, 1) + + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, + XFS_FSB_TO_B(mp, 1)) + +-- +2.39.5 + diff --git a/queue-6.1/xfs-reset-rootdir-extent-size-hint-after-growfsrt.patch b/queue-6.1/xfs-reset-rootdir-extent-size-hint-after-growfsrt.patch new file mode 100644 index 0000000000..008900fa47 --- /dev/null +++ b/queue-6.1/xfs-reset-rootdir-extent-size-hint-after-growfsrt.patch @@ -0,0 +1,95 @@ +From 940f9e7edd7b67c8ffcf514767c94fb0dc920000 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:27 -0700 +Subject: xfs: reset rootdir extent size hint after growfsrt + +From: Darrick J. Wong + +[ Upstream commit a24cae8fc1f13f6f6929351309f248fd2e9351ce ] + +If growfsrt is run on a filesystem that doesn't have a rt volume, it's +possible to change the rt extent size. If the root directory was +previously set up with an inherited extent size hint and rtinherit, it's +possible that the hint is no longer a multiple of the rt extent size. +Although the verifiers don't complain about this, xfs_repair will, so if +we detect this situation, log the root directory to clean it up. This +is still racy, but it's better than nothing. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_rtalloc.c | 40 ++++++++++++++++++++++++++++++++++++++++ + 1 file changed, 40 insertions(+) + +diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c +index 149fcfc485d89..fc21b4e81ade8 100644 +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -915,6 +915,39 @@ xfs_alloc_rsum_cache( + xfs_warn(mp, "could not allocate realtime summary cache"); + } + ++/* ++ * If we changed the rt extent size (meaning there was no rt volume previously) ++ * and the root directory had EXTSZINHERIT and RTINHERIT set, it's possible ++ * that the extent size hint on the root directory is no longer congruent with ++ * the new rt extent size. Log the rootdir inode to fix this. ++ */ ++static int ++xfs_growfs_rt_fixup_extsize( ++ struct xfs_mount *mp) ++{ ++ struct xfs_inode *ip = mp->m_rootip; ++ struct xfs_trans *tp; ++ int error = 0; ++ ++ xfs_ilock(ip, XFS_IOLOCK_EXCL); ++ if (!(ip->i_diflags & XFS_DIFLAG_RTINHERIT) || ++ !(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT)) ++ goto out_iolock; ++ ++ error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_ichange, 0, 0, false, ++ &tp); ++ if (error) ++ goto out_iolock; ++ ++ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); ++ error = xfs_trans_commit(tp); ++ xfs_iunlock(ip, XFS_ILOCK_EXCL); ++ ++out_iolock: ++ xfs_iunlock(ip, XFS_IOLOCK_EXCL); ++ return error; ++} ++ + /* + * Visible (exported) functions. + */ +@@ -944,6 +977,7 @@ xfs_growfs_rt( + xfs_sb_t *sbp; /* old superblock */ + xfs_fsblock_t sumbno; /* summary block number */ + uint8_t *rsum_cache; /* old summary cache */ ++ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize; + + sbp = &mp->m_sb; + +@@ -1177,6 +1211,12 @@ xfs_growfs_rt( + if (error) + goto out_free; + ++ if (old_rextsize != in->extsize) { ++ error = xfs_growfs_rt_fixup_extsize(mp); ++ if (error) ++ goto out_free; ++ } ++ + /* Update secondary superblocks now the physical grow has completed */ + error = xfs_update_secondary_sbs(mp); + +-- +2.39.5 + diff --git a/queue-6.1/xfs-take-m_growlock-when-running-growfsrt.patch b/queue-6.1/xfs-take-m_growlock-when-running-growfsrt.patch new file mode 100644 index 0000000000..114a0d7773 --- /dev/null +++ b/queue-6.1/xfs-take-m_growlock-when-running-growfsrt.patch @@ -0,0 +1,133 @@ +From e6de4e8f598ac5d52ebd51721376d02f1331b1b6 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:26 -0700 +Subject: xfs: take m_growlock when running growfsrt + +From: Darrick J. Wong + +[ Upstream commit 16e1fbdce9c8d084863fd63cdaff8fb2a54e2f88 ] + +Take the grow lock when we're expanding the realtime volume, like we do +for the other growfs calls. + +Reviewed-by: Christoph Hellwig +Signed-off-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_rtalloc.c | 38 +++++++++++++++++++++++++------------- + 1 file changed, 25 insertions(+), 13 deletions(-) + +diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c +index 5cf1e91f4c205..149fcfc485d89 100644 +--- a/fs/xfs/xfs_rtalloc.c ++++ b/fs/xfs/xfs_rtalloc.c +@@ -953,34 +953,39 @@ xfs_growfs_rt( + /* Needs to have been mounted with an rt device. */ + if (!XFS_IS_REALTIME_MOUNT(mp)) + return -EINVAL; ++ ++ if (!mutex_trylock(&mp->m_growlock)) ++ return -EWOULDBLOCK; + /* + * Mount should fail if the rt bitmap/summary files don't load, but + * we'll check anyway. + */ ++ error = -EINVAL; + if (!mp->m_rbmip || !mp->m_rsumip) +- return -EINVAL; ++ goto out_unlock; + + /* Shrink not supported. */ + if (in->newblocks <= sbp->sb_rblocks) +- return -EINVAL; ++ goto out_unlock; + + /* Can only change rt extent size when adding rt volume. */ + if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize) +- return -EINVAL; ++ goto out_unlock; + + /* Range check the extent size. */ + if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE || + XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE) +- return -EINVAL; ++ goto out_unlock; + + /* Unsupported realtime features. */ ++ error = -EOPNOTSUPP; + if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp)) +- return -EOPNOTSUPP; ++ goto out_unlock; + + nrblocks = in->newblocks; + error = xfs_sb_validate_fsb_count(sbp, nrblocks); + if (error) +- return error; ++ goto out_unlock; + /* + * Read in the last block of the device, make sure it exists. + */ +@@ -988,7 +993,7 @@ xfs_growfs_rt( + XFS_FSB_TO_BB(mp, nrblocks - 1), + XFS_FSB_TO_BB(mp, 1), 0, &bp, NULL); + if (error) +- return error; ++ goto out_unlock; + xfs_buf_relse(bp); + + /* +@@ -996,8 +1001,10 @@ xfs_growfs_rt( + */ + nrextents = nrblocks; + do_div(nrextents, in->extsize); +- if (!xfs_validate_rtextents(nrextents)) +- return -EINVAL; ++ if (!xfs_validate_rtextents(nrextents)) { ++ error = -EINVAL; ++ goto out_unlock; ++ } + nrbmblocks = howmany_64(nrextents, NBBY * sbp->sb_blocksize); + nrextslog = xfs_compute_rextslog(nrextents); + nrsumlevels = nrextslog + 1; +@@ -1009,8 +1016,11 @@ xfs_growfs_rt( + * the log. This prevents us from getting a log overflow, + * since we'll log basically the whole summary file at once. + */ +- if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) +- return -EINVAL; ++ if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) { ++ error = -EINVAL; ++ goto out_unlock; ++ } ++ + /* + * Get the old block counts for bitmap and summary inodes. + * These can't change since other growfs callers are locked out. +@@ -1022,10 +1032,10 @@ xfs_growfs_rt( + */ + error = xfs_growfs_rt_alloc(mp, rbmblocks, nrbmblocks, mp->m_rbmip); + if (error) +- return error; ++ goto out_unlock; + error = xfs_growfs_rt_alloc(mp, rsumblocks, nrsumblocks, mp->m_rsumip); + if (error) +- return error; ++ goto out_unlock; + + rsum_cache = mp->m_rsum_cache; + if (nrbmblocks != sbp->sb_rbmblocks) +@@ -1190,6 +1200,8 @@ xfs_growfs_rt( + } + } + ++out_unlock: ++ mutex_unlock(&mp->m_growlock); + return error; + } + +-- +2.39.5 + diff --git a/queue-6.1/xfs-use-consistent-uid-gid-when-grabbing-dquots-for-.patch b/queue-6.1/xfs-use-consistent-uid-gid-when-grabbing-dquots-for-.patch new file mode 100644 index 0000000000..e142f18d2c --- /dev/null +++ b/queue-6.1/xfs-use-consistent-uid-gid-when-grabbing-dquots-for-.patch @@ -0,0 +1,105 @@ +From 31dfa872eb49c11440c1cdd922b1199df7f59b7f Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:15 -0700 +Subject: xfs: use consistent uid/gid when grabbing dquots for inodes + +From: Darrick J. Wong + +[ Upstream commit 24a4e1cb322e2bf0f3a1afd1978b610a23aa8f36 ] + +[ 6.1: resolved conflicts in xfs_inode.c and xfs_symlink.c due to 6.1 +not having switched to idmap yet ] + +I noticed that callers of xfs_qm_vop_dqalloc use the following code to +compute the anticipated uid of the new file: + + mapped_fsuid(idmap, &init_user_ns); + +whereas the VFS uses a slightly different computation for actually +assigning i_uid: + + mapped_fsuid(idmap, i_user_ns(inode)); + +Technically, these are not the same things. According to Christian +Brauner, the only time that inode->i_sb->s_user_ns != &init_user_ns is +when the filesystem was mounted in a new mount namespace by an +unpriviledged user. XFS does not allow this, which is why we've never +seen bug reports about quotas being incorrect or the uid checks in +xfs_qm_vop_create_dqattach tripping debug assertions. + +However, this /is/ a logic bomb, so let's make the code consistent. + +Link: https://lore.kernel.org/linux-fsdevel/20240617-weitblick-gefertigt-4a41f37119fa@brauner/ +Fixes: c14329d39f2d ("fs: port fs{g,u}id helpers to mnt_idmap") +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Catherine Hoang +Acked-by: Darrick J. Wong +Signed-off-by: Sasha Levin +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_inode.c | 16 ++++++++++------ + fs/xfs/xfs_symlink.c | 8 +++++--- + 2 files changed, 15 insertions(+), 9 deletions(-) + +diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c +index b26d26d29273d..88d0a088fa862 100644 +--- a/fs/xfs/xfs_inode.c ++++ b/fs/xfs/xfs_inode.c +@@ -983,10 +983,12 @@ xfs_create( + prid = xfs_get_initial_prid(dp); + + /* +- * Make sure that we have allocated dquot(s) on disk. ++ * Make sure that we have allocated dquot(s) on disk. The uid/gid ++ * computation code must match what the VFS uses to assign i_[ug]id. ++ * INHERIT adjusts the gid computation for setgid/grpid systems. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), +- mapped_fsgid(mnt_userns, &init_user_ns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))), ++ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +@@ -1132,10 +1134,12 @@ xfs_create_tmpfile( + prid = xfs_get_initial_prid(dp); + + /* +- * Make sure that we have allocated dquot(s) on disk. ++ * Make sure that we have allocated dquot(s) on disk. The uid/gid ++ * computation code must match what the VFS uses to assign i_[ug]id. ++ * INHERIT adjusts the gid computation for setgid/grpid systems. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), +- mapped_fsgid(mnt_userns, &init_user_ns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))), ++ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c +index 8389f3ef88ef2..78bd02a98aa53 100644 +--- a/fs/xfs/xfs_symlink.c ++++ b/fs/xfs/xfs_symlink.c +@@ -191,10 +191,12 @@ xfs_symlink( + prid = xfs_get_initial_prid(dp); + + /* +- * Make sure that we have allocated dquot(s) on disk. ++ * Make sure that we have allocated dquot(s) on disk. The uid/gid ++ * computation code must match what the VFS uses to assign i_[ug]id. ++ * INHERIT adjusts the gid computation for setgid/grpid systems. + */ +- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), +- mapped_fsgid(mnt_userns, &init_user_ns), prid, ++ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, i_user_ns(VFS_I(dp))), ++ mapped_fsgid(mnt_userns, i_user_ns(VFS_I(dp))), prid, + XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, + &udqp, &gdqp, &pdqp); + if (error) +-- +2.39.5 + diff --git a/queue-6.1/xfs-use-xfs_buf_daddr_null-for-daddrs-in-getfsmap-co.patch b/queue-6.1/xfs-use-xfs_buf_daddr_null-for-daddrs-in-getfsmap-co.patch new file mode 100644 index 0000000000..30dd3d88ae --- /dev/null +++ b/queue-6.1/xfs-use-xfs_buf_daddr_null-for-daddrs-in-getfsmap-co.patch @@ -0,0 +1,49 @@ +From 7ae1259c798b352e6a580239dfba2e8cf681255a Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:25 -0700 +Subject: xfs: use XFS_BUF_DADDR_NULL for daddrs in getfsmap code + +From: Darrick J. Wong + +[ Upstream commit 6b35cc8d9239569700cc7cc737c8ed40b8b9cfdb ] + +Use XFS_BUF_DADDR_NULL (instead of a magic sentinel value) to mean "this +field is null" like the rest of xfs. + +Cc: wozizhi@huawei.com +Fixes: e89c041338ed6 ("xfs: implement the GETFSMAP ioctl") +Reviewed-by: Christoph Hellwig +Signed-off-by: Darrick J. Wong +Signed-off-by: Chandan Babu R +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index 1efd18437ca4c..a0668a1ef1006 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -252,7 +252,7 @@ xfs_getfsmap_rec_before_start( + const struct xfs_rmap_irec *rec, + xfs_daddr_t rec_daddr) + { +- if (info->low_daddr != -1ULL) ++ if (info->low_daddr != XFS_BUF_DADDR_NULL) + return rec_daddr < info->low_daddr; + if (info->low.rm_blockcount) + return xfs_rmap_compare(rec, &info->low) < 0; +@@ -986,7 +986,7 @@ xfs_getfsmap( + info.dev = handlers[i].dev; + info.last = false; + info.pag = NULL; +- info.low_daddr = -1ULL; ++ info.low_daddr = XFS_BUF_DADDR_NULL; + info.low.rm_blockcount = 0; + error = handlers[i].fn(tp, dkeys, &info); + if (error) +-- +2.39.5 + diff --git a/queue-6.1/xfs-validate-fsmap-offsets-specified-in-the-query-ke.patch b/queue-6.1/xfs-validate-fsmap-offsets-specified-in-the-query-ke.patch new file mode 100644 index 0000000000..5b19a38495 --- /dev/null +++ b/queue-6.1/xfs-validate-fsmap-offsets-specified-in-the-query-ke.patch @@ -0,0 +1,96 @@ +From fadd1ab10782b4b335bd7f7290fbabf4b22b194c Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:10 -0700 +Subject: xfs: validate fsmap offsets specified in the query keys + +From: Darrick J. Wong + +[ Upstream commit 3ee9351e74907fe3acb0721c315af25b05dc87da ] + +Improve the validation of the fsmap offset fields in the query keys and +move the validation to the top of the function now that we have pushed +the low key adjustment code downwards. + +Also fix some indenting issues that aren't worth a separate patch. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Dave Chinner +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/xfs_fsmap.c | 30 +++++++++++++++++++----------- + 1 file changed, 19 insertions(+), 11 deletions(-) + +diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c +index cdd806d80b7cf..d10f2c719220d 100644 +--- a/fs/xfs/xfs_fsmap.c ++++ b/fs/xfs/xfs_fsmap.c +@@ -802,6 +802,19 @@ xfs_getfsmap_check_keys( + struct xfs_fsmap *low_key, + struct xfs_fsmap *high_key) + { ++ if (low_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { ++ if (low_key->fmr_offset) ++ return false; ++ } ++ if (high_key->fmr_flags != -1U && ++ (high_key->fmr_flags & (FMR_OF_SPECIAL_OWNER | ++ FMR_OF_EXTENT_MAP))) { ++ if (high_key->fmr_offset && high_key->fmr_offset != -1ULL) ++ return false; ++ } ++ if (high_key->fmr_length && high_key->fmr_length != -1ULL) ++ return false; ++ + if (low_key->fmr_device > high_key->fmr_device) + return false; + if (low_key->fmr_device < high_key->fmr_device) +@@ -845,15 +858,15 @@ xfs_getfsmap_check_keys( + * ---------------- + * There are multiple levels of keys and counters at work here: + * xfs_fsmap_head.fmh_keys -- low and high fsmap keys passed in; +- * these reflect fs-wide sector addrs. ++ * these reflect fs-wide sector addrs. + * dkeys -- fmh_keys used to query each device; +- * these are fmh_keys but w/ the low key +- * bumped up by fmr_length. ++ * these are fmh_keys but w/ the low key ++ * bumped up by fmr_length. + * xfs_getfsmap_info.next_daddr -- next disk addr we expect to see; this + * is how we detect gaps in the fsmap + records and report them. + * xfs_getfsmap_info.low/high -- per-AG low/high keys computed from +- * dkeys; used to query the metadata. ++ * dkeys; used to query the metadata. + */ + int + xfs_getfsmap( +@@ -874,6 +887,8 @@ xfs_getfsmap( + if (!xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[0]) || + !xfs_getfsmap_is_valid_device(mp, &head->fmh_keys[1])) + return -EINVAL; ++ if (!xfs_getfsmap_check_keys(&head->fmh_keys[0], &head->fmh_keys[1])) ++ return -EINVAL; + + use_rmap = xfs_has_rmapbt(mp) && + has_capability_noaudit(current, CAP_SYS_ADMIN); +@@ -919,15 +934,8 @@ xfs_getfsmap( + * other mapping for the same physical block range. + */ + dkeys[0] = head->fmh_keys[0]; +- if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) { +- if (dkeys[0].fmr_offset) +- return -EINVAL; +- } + memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap)); + +- if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1])) +- return -EINVAL; +- + info.next_daddr = head->fmh_keys[0].fmr_physical + + head->fmh_keys[0].fmr_length; + info.fsmap_recs = fsmap_recs; +-- +2.39.5 + diff --git a/queue-6.1/xfs-verify-buffer-inode-and-dquot-items-every-tx-com.patch b/queue-6.1/xfs-verify-buffer-inode-and-dquot-items-every-tx-com.patch new file mode 100644 index 0000000000..6cd637ba96 --- /dev/null +++ b/queue-6.1/xfs-verify-buffer-inode-and-dquot-items-every-tx-com.patch @@ -0,0 +1,285 @@ +From fd1f68141e5656b0e79d3e092965cfc1b6e3daa7 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Wed, 11 Jun 2025 14:01:14 -0700 +Subject: xfs: verify buffer, inode, and dquot items every tx commit + +From: Darrick J. Wong + +[ Upstream commit 150bb10a28b9c8709ae227fc898d9cf6136faa1e ] + +generic/388 has an annoying tendency to fail like this during log +recovery: + +XFS (sda4): Unmounting Filesystem 435fe39b-82b6-46ef-be56-819499585130 +XFS (sda4): Mounting V5 Filesystem 435fe39b-82b6-46ef-be56-819499585130 +XFS (sda4): Starting recovery (logdev: internal) +00000000: 49 4e 81 b6 03 02 00 00 00 00 00 07 00 00 00 07 IN.............. +00000010: 00 00 00 01 00 00 00 00 00 00 00 00 00 00 00 10 ................ +00000020: 35 9a 8b c1 3e 6e 81 00 35 9a 8b c1 3f dc b7 00 5...>n..5...?... +00000030: 35 9a 8b c1 3f dc b7 00 00 00 00 00 00 3c 86 4f 5...?........<.O +00000040: 00 00 00 00 00 00 02 f3 00 00 00 00 00 00 00 00 ................ +00000050: 00 00 1f 01 00 00 00 00 00 00 00 02 b2 74 c9 0b .............t.. +00000060: ff ff ff ff d7 45 73 10 00 00 00 00 00 00 00 2d .....Es........- +00000070: 00 00 07 92 00 01 fe 30 00 00 00 00 00 00 00 1a .......0........ +00000080: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ +00000090: 35 9a 8b c1 3b 55 0c 00 00 00 00 00 04 27 b2 d1 5...;U.......'.. +000000a0: 43 5f e3 9b 82 b6 46 ef be 56 81 94 99 58 51 30 C_....F..V...XQ0 +XFS (sda4): Internal error Bad dinode after recovery at line 539 of file fs/xfs/xfs_inode_item_recover.c. Caller xlog_recover_items_pass2+0x4e/0xc0 [xfs] +CPU: 0 PID: 2189311 Comm: mount Not tainted 6.9.0-rc4-djwx #rc4 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS ?-20171121_152543-x86-ol7-builder-01.us.oracle.com-4.el7.1 04/01/2014 +Call Trace: + + dump_stack_lvl+0x4f/0x60 + xfs_corruption_error+0x90/0xa0 + xlog_recover_inode_commit_pass2+0x5f1/0xb00 + xlog_recover_items_pass2+0x4e/0xc0 + xlog_recover_commit_trans+0x2db/0x350 + xlog_recovery_process_trans+0xab/0xe0 + xlog_recover_process_data+0xa7/0x130 + xlog_do_recovery_pass+0x398/0x840 + xlog_do_log_recovery+0x62/0xc0 + xlog_do_recover+0x34/0x1d0 + xlog_recover+0xe9/0x1a0 + xfs_log_mount+0xff/0x260 + xfs_mountfs+0x5d9/0xb60 + xfs_fs_fill_super+0x76b/0xa30 + get_tree_bdev+0x124/0x1d0 + vfs_get_tree+0x17/0xa0 + path_mount+0x72b/0xa90 + __x64_sys_mount+0x112/0x150 + do_syscall_64+0x49/0x100 + entry_SYSCALL_64_after_hwframe+0x4b/0x53 + +XFS (sda4): Corruption detected. Unmount and run xfs_repair +XFS (sda4): Metadata corruption detected at xfs_dinode_verify.part.0+0x739/0x920 [xfs], inode 0x427b2d1 +XFS (sda4): Filesystem has been shut down due to log error (0x2). +XFS (sda4): Please unmount the filesystem and rectify the problem(s). +XFS (sda4): log mount/recovery failed: error -117 +XFS (sda4): log mount failed + +This inode log item recovery failing the dinode verifier after +replaying the contents of the inode log item into the ondisk inode. +Looking back into what the kernel was doing at the time of the fs +shutdown, a thread was in the middle of running a series of +transactions, each of which committed changes to the inode. + +At some point in the middle of that chain, an invalid (at least +according to the verifier) change was committed. Had the filesystem not +shut down in the middle of the chain, a subsequent transaction would +have corrected the invalid state and nobody would have noticed. But +that's not what happened here. Instead, the invalid inode state was +committed to the ondisk log, so log recovery tripped over it. + +The actual defect here was an overzealous inode verifier, which was +fixed in a separate patch. This patch adds some transaction precommit +functions for CONFIG_XFS_DEBUG=y mode so that we can detect these kinds +of transient errors at transaction commit time, where it's much easier +to find the root cause. + +Signed-off-by: Darrick J. Wong +Reviewed-by: Christoph Hellwig +Signed-off-by: Leah Rumancik +Acked-by: "Darrick J. Wong" +Signed-off-by: Sasha Levin +--- + fs/xfs/Kconfig | 12 ++++++++++++ + fs/xfs/xfs.h | 4 ++++ + fs/xfs/xfs_buf_item.c | 32 ++++++++++++++++++++++++++++++++ + fs/xfs/xfs_dquot_item.c | 31 +++++++++++++++++++++++++++++++ + fs/xfs/xfs_inode_item.c | 32 ++++++++++++++++++++++++++++++++ + 5 files changed, 111 insertions(+) + +diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig +index 9fac5ea8d0e48..dff90db507e35 100644 +--- a/fs/xfs/Kconfig ++++ b/fs/xfs/Kconfig +@@ -154,6 +154,18 @@ config XFS_DEBUG + + Say N unless you are an XFS developer, or you play one on TV. + ++config XFS_DEBUG_EXPENSIVE ++ bool "XFS expensive debugging checks" ++ depends on XFS_FS && XFS_DEBUG ++ help ++ Say Y here to get an XFS build with expensive debugging checks ++ enabled. These checks may affect performance significantly. ++ ++ Note that the resulting code will be HUGER and SLOWER, and probably ++ not useful unless you are debugging a particular problem. ++ ++ Say N unless you are an XFS developer, or you play one on TV. ++ + config XFS_ASSERT_FATAL + bool "XFS fatal asserts" + default y +diff --git a/fs/xfs/xfs.h b/fs/xfs/xfs.h +index f6ffb4f248f78..9355ccad9503b 100644 +--- a/fs/xfs/xfs.h ++++ b/fs/xfs/xfs.h +@@ -10,6 +10,10 @@ + #define DEBUG 1 + #endif + ++#ifdef CONFIG_XFS_DEBUG_EXPENSIVE ++#define DEBUG_EXPENSIVE 1 ++#endif ++ + #ifdef CONFIG_XFS_ASSERT_FATAL + #define XFS_ASSERT_FATAL 1 + #endif +diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c +index 023d4e0385dd0..b02ce568de0c4 100644 +--- a/fs/xfs/xfs_buf_item.c ++++ b/fs/xfs/xfs_buf_item.c +@@ -22,6 +22,7 @@ + #include "xfs_trace.h" + #include "xfs_log.h" + #include "xfs_log_priv.h" ++#include "xfs_error.h" + + + struct kmem_cache *xfs_buf_item_cache; +@@ -781,8 +782,39 @@ xfs_buf_item_committed( + return lsn; + } + ++#ifdef DEBUG_EXPENSIVE ++static int ++xfs_buf_item_precommit( ++ struct xfs_trans *tp, ++ struct xfs_log_item *lip) ++{ ++ struct xfs_buf_log_item *bip = BUF_ITEM(lip); ++ struct xfs_buf *bp = bip->bli_buf; ++ struct xfs_mount *mp = bp->b_mount; ++ xfs_failaddr_t fa; ++ ++ if (!bp->b_ops || !bp->b_ops->verify_struct) ++ return 0; ++ if (bip->bli_flags & XFS_BLI_STALE) ++ return 0; ++ ++ fa = bp->b_ops->verify_struct(bp); ++ if (fa) { ++ xfs_buf_verifier_error(bp, -EFSCORRUPTED, bp->b_ops->name, ++ bp->b_addr, BBTOB(bp->b_length), fa); ++ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ++ ASSERT(fa == NULL); ++ } ++ ++ return 0; ++} ++#else ++# define xfs_buf_item_precommit NULL ++#endif ++ + static const struct xfs_item_ops xfs_buf_item_ops = { + .iop_size = xfs_buf_item_size, ++ .iop_precommit = xfs_buf_item_precommit, + .iop_format = xfs_buf_item_format, + .iop_pin = xfs_buf_item_pin, + .iop_unpin = xfs_buf_item_unpin, +diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c +index 6a1aae799cf16..7d19091215b08 100644 +--- a/fs/xfs/xfs_dquot_item.c ++++ b/fs/xfs/xfs_dquot_item.c +@@ -17,6 +17,7 @@ + #include "xfs_trans_priv.h" + #include "xfs_qm.h" + #include "xfs_log.h" ++#include "xfs_error.h" + + static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip) + { +@@ -193,8 +194,38 @@ xfs_qm_dquot_logitem_committing( + return xfs_qm_dquot_logitem_release(lip); + } + ++#ifdef DEBUG_EXPENSIVE ++static int ++xfs_qm_dquot_logitem_precommit( ++ struct xfs_trans *tp, ++ struct xfs_log_item *lip) ++{ ++ struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; ++ struct xfs_mount *mp = dqp->q_mount; ++ struct xfs_disk_dquot ddq = { }; ++ xfs_failaddr_t fa; ++ ++ xfs_dquot_to_disk(&ddq, dqp); ++ fa = xfs_dquot_verify(mp, &ddq, dqp->q_id); ++ if (fa) { ++ XFS_CORRUPTION_ERROR("Bad dquot during logging", ++ XFS_ERRLEVEL_LOW, mp, &ddq, sizeof(ddq)); ++ xfs_alert(mp, ++ "Metadata corruption detected at %pS, dquot 0x%x", ++ fa, dqp->q_id); ++ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ++ ASSERT(fa == NULL); ++ } ++ ++ return 0; ++} ++#else ++# define xfs_qm_dquot_logitem_precommit NULL ++#endif ++ + static const struct xfs_item_ops xfs_dquot_item_ops = { + .iop_size = xfs_qm_dquot_logitem_size, ++ .iop_precommit = xfs_qm_dquot_logitem_precommit, + .iop_format = xfs_qm_dquot_logitem_format, + .iop_pin = xfs_qm_dquot_logitem_pin, + .iop_unpin = xfs_qm_dquot_logitem_unpin, +diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c +index 2ec23c9af760c..a734ca8d8f03c 100644 +--- a/fs/xfs/xfs_inode_item.c ++++ b/fs/xfs/xfs_inode_item.c +@@ -36,6 +36,36 @@ xfs_inode_item_sort( + return INODE_ITEM(lip)->ili_inode->i_ino; + } + ++#ifdef DEBUG_EXPENSIVE ++static void ++xfs_inode_item_precommit_check( ++ struct xfs_inode *ip) ++{ ++ struct xfs_mount *mp = ip->i_mount; ++ struct xfs_dinode *dip; ++ xfs_failaddr_t fa; ++ ++ dip = kzalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | GFP_NOFS); ++ if (!dip) { ++ ASSERT(dip != NULL); ++ return; ++ } ++ ++ xfs_inode_to_disk(ip, dip, 0); ++ xfs_dinode_calc_crc(mp, dip); ++ fa = xfs_dinode_verify(mp, ip->i_ino, dip); ++ if (fa) { ++ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip, ++ sizeof(*dip), fa); ++ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); ++ ASSERT(fa == NULL); ++ } ++ kfree(dip); ++} ++#else ++# define xfs_inode_item_precommit_check(ip) ((void)0) ++#endif ++ + /* + * Prior to finally logging the inode, we have to ensure that all the + * per-modification inode state changes are applied. This includes VFS inode +@@ -168,6 +198,8 @@ xfs_inode_item_precommit( + iip->ili_fields |= (flags | iip->ili_last_fields); + spin_unlock(&iip->ili_lock); + ++ xfs_inode_item_precommit_check(ip); ++ + /* + * We are done with the log item transaction dirty state, so clear it so + * that it doesn't pollute future transactions. +-- +2.39.5 +