Fixes for 6.4

author Sasha Levin <sashal@kernel.org>

Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)

committer Sasha Levin <sashal@kernel.org>

Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)
author Sasha Levin <sashal@kernel.org>
Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)
committer Sasha Levin <sashal@kernel.org>
Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)
diff --git a/queue-6.4/arm-dts-at91-sam9x60-fix-the-soc-detection.patch b/queue-6.4/arm-dts-at91-sam9x60-fix-the-soc-detection.patch

new file mode 100644 (file)

index 0000000..b86a4b0
--- /dev/null
+++ b/queue-6.4/arm-dts-at91-sam9x60-fix-the-soc-detection.patch
@@ -0,0 +1,145 @@
+From 0d4fc21caa3357edc3921de1114eaefc29a2e574 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 15:30:42 +0530
+Subject: ARM: dts: at91: sam9x60: fix the SOC detection
+
+From: Durai Manickam KR <durai.manickamkr@microchip.com>
+
+[ Upstream commit f6ad3c13f1b8c4e785cb7bd423887197142f47b0 ]
+
+Remove the dbgu compatible strings in the UART submodule of the
+flexcom for the proper SOC detection.
+
+Fixes: 99c808335877 (ARM: dts: at91: sam9x60: Add missing flexcom definitions)
+Signed-off-by: Durai Manickam KR <durai.manickamkr@microchip.com>
+Link: https://lore.kernel.org/r/20230712100042.317856-1-durai.manickamkr@microchip.com
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/sam9x60.dtsi | 26 +++++++++++++-------------
+ 1 file changed, 13 insertions(+), 13 deletions(-)
+
+diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
+index 8b53997675e75..73d570a172690 100644
+--- a/arch/arm/boot/dts/sam9x60.dtsi
++++ b/arch/arm/boot/dts/sam9x60.dtsi
+@@ -172,7 +172,7 @@
+                               status = "disabled";
+ 
+                               uart4: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <13 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -240,7 +240,7 @@
+                               status = "disabled";
+ 
+                               uart5: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       atmel,usart-mode = <AT91_USART_MODE_SERIAL>;
+                                       interrupts = <14 IRQ_TYPE_LEVEL_HIGH 7>;
+@@ -370,7 +370,7 @@
+                               status = "disabled";
+ 
+                               uart11: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <32 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -419,7 +419,7 @@
+                               status = "disabled";
+ 
+                               uart12: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <33 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -576,7 +576,7 @@
+                               status = "disabled";
+ 
+                               uart6: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <9 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -625,7 +625,7 @@
+                               status = "disabled";
+ 
+                               uart7: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <10 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -674,7 +674,7 @@
+                               status = "disabled";
+ 
+                               uart8: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <11 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -723,7 +723,7 @@
+                               status = "disabled";
+ 
+                               uart0: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <5 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -791,7 +791,7 @@
+                               status = "disabled";
+ 
+                               uart1: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <6 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -859,7 +859,7 @@
+                               status = "disabled";
+ 
+                               uart2: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <7 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -927,7 +927,7 @@
+                               status = "disabled";
+ 
+                               uart3: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <8 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -1050,7 +1050,7 @@
+                               status = "disabled";
+ 
+                               uart9: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <15 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+@@ -1099,7 +1099,7 @@
+                               status = "disabled";
+ 
+                               uart10: serial@200 {
+-                                      compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart";
++                                      compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart";
+                                       reg = <0x200 0x200>;
+                                       interrupts = <16 IRQ_TYPE_LEVEL_HIGH 7>;
+                                       dmas = <&dma0
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm-dts-at91-use-clock-controller-name-for-pmc-nodes.patch b/queue-6.4/arm-dts-at91-use-clock-controller-name-for-pmc-nodes.patch

new file mode 100644 (file)

index 0000000..29f2a69
--- /dev/null
+++ b/queue-6.4/arm-dts-at91-use-clock-controller-name-for-pmc-nodes.patch
@@ -0,0 +1,287 @@
+From 5b8b25bf97af5b7cd29ddb52eb1480d863805620 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 May 2023 12:41:15 +0300
+Subject: ARM: dts: at91: use clock-controller name for PMC nodes
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+[ Upstream commit d08f92bdfb2dc4a2a14237cfd8a22c568781797c ]
+
+Use clock-controller generic name for PMC nodes.
+
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Link: https://lore.kernel.org/r/20230517094119.2894220-2-claudiu.beznea@microchip.com
+Stable-dep-of: f6ad3c13f1b8 ("ARM: dts: at91: sam9x60: fix the SOC detection")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/at91rm9200.dtsi   | 2 +-
+ arch/arm/boot/dts/at91sam9260.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9261.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9263.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9g20.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9g25.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9g35.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9g45.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9n12.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9rl.dtsi   | 2 +-
+ arch/arm/boot/dts/at91sam9x25.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9x35.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9x5.dtsi   | 2 +-
+ arch/arm/boot/dts/sam9x60.dtsi      | 2 +-
+ arch/arm/boot/dts/sama5d2.dtsi      | 2 +-
+ arch/arm/boot/dts/sama5d3.dtsi      | 2 +-
+ arch/arm/boot/dts/sama5d3_emac.dtsi | 2 +-
+ arch/arm/boot/dts/sama5d4.dtsi      | 2 +-
+ arch/arm/boot/dts/sama7g5.dtsi      | 2 +-
+ 19 files changed, 19 insertions(+), 19 deletions(-)
+
+diff --git a/arch/arm/boot/dts/at91rm9200.dtsi b/arch/arm/boot/dts/at91rm9200.dtsi
+index 6f9004ebf4245..37b500f6f3956 100644
+--- a/arch/arm/boot/dts/at91rm9200.dtsi
++++ b/arch/arm/boot/dts/at91rm9200.dtsi
+@@ -102,7 +102,7 @@
+                               reg = <0xffffff00 0x100>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91rm9200-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
+index 789fe356dbf60..16e3b24b4dddb 100644
+--- a/arch/arm/boot/dts/at91sam9260.dtsi
++++ b/arch/arm/boot/dts/at91sam9260.dtsi
+@@ -115,7 +115,7 @@
+                               reg = <0xffffee00 0x200>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9260-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
+index ee0bd1aceb3f0..fe9ead867e2ab 100644
+--- a/arch/arm/boot/dts/at91sam9261.dtsi
++++ b/arch/arm/boot/dts/at91sam9261.dtsi
+@@ -599,7 +599,7 @@
+                               };
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9261-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
+index 3ce9ea9873129..ee5e6ed44dd40 100644
+--- a/arch/arm/boot/dts/at91sam9263.dtsi
++++ b/arch/arm/boot/dts/at91sam9263.dtsi
+@@ -101,7 +101,7 @@
+                               atmel,external-irqs = <30 31>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9263-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9g20.dtsi b/arch/arm/boot/dts/at91sam9g20.dtsi
+index 708e1646b7f46..738a43ffd2281 100644
+--- a/arch/arm/boot/dts/at91sam9g20.dtsi
++++ b/arch/arm/boot/dts/at91sam9g20.dtsi
+@@ -41,7 +41,7 @@
+                               atmel,adc-startup-time = <40>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9g20-pmc", "atmel,at91sam9260-pmc", "syscon";
+                       };
+               };
+diff --git a/arch/arm/boot/dts/at91sam9g25.dtsi b/arch/arm/boot/dts/at91sam9g25.dtsi
+index d2f13afb35eaf..ec3c77221881c 100644
+--- a/arch/arm/boot/dts/at91sam9g25.dtsi
++++ b/arch/arm/boot/dts/at91sam9g25.dtsi
+@@ -26,7 +26,7 @@
+                                     >;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9g25-pmc", "atmel,at91sam9x5-pmc", "syscon";
+                       };
+               };
+diff --git a/arch/arm/boot/dts/at91sam9g35.dtsi b/arch/arm/boot/dts/at91sam9g35.dtsi
+index 48c2bc4a7753d..c9cfb93092ee6 100644
+--- a/arch/arm/boot/dts/at91sam9g35.dtsi
++++ b/arch/arm/boot/dts/at91sam9g35.dtsi
+@@ -25,7 +25,7 @@
+                                     >;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9g35-pmc", "atmel,at91sam9x5-pmc", "syscon";
+                       };
+               };
+diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
+index 95f5d76234dbb..76afeb31b7f54 100644
+--- a/arch/arm/boot/dts/at91sam9g45.dtsi
++++ b/arch/arm/boot/dts/at91sam9g45.dtsi
+@@ -129,7 +129,7 @@
+                               reg = <0xffffea00 0x200>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9g45-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
+index 83114d26f10d0..c2e7460fb7ff6 100644
+--- a/arch/arm/boot/dts/at91sam9n12.dtsi
++++ b/arch/arm/boot/dts/at91sam9n12.dtsi
+@@ -118,7 +118,7 @@
+                               reg = <0xffffea00 0x200>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9n12-pmc", "syscon";
+                               reg = <0xfffffc00 0x200>;
+                               #clock-cells = <2>;
+diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi
+index 364a2ff0a763d..a12e6c419fe3d 100644
+--- a/arch/arm/boot/dts/at91sam9rl.dtsi
++++ b/arch/arm/boot/dts/at91sam9rl.dtsi
+@@ -763,7 +763,7 @@
+                               };
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9rl-pmc", "syscon";
+                               reg = <0xfffffc00 0x100>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/at91sam9x25.dtsi b/arch/arm/boot/dts/at91sam9x25.dtsi
+index 0fe8802e1242b..7036f5f045715 100644
+--- a/arch/arm/boot/dts/at91sam9x25.dtsi
++++ b/arch/arm/boot/dts/at91sam9x25.dtsi
+@@ -27,7 +27,7 @@
+                                     >;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9x25-pmc", "atmel,at91sam9x5-pmc", "syscon";
+                       };
+               };
+diff --git a/arch/arm/boot/dts/at91sam9x35.dtsi b/arch/arm/boot/dts/at91sam9x35.dtsi
+index 0bfa21f18f870..eb03b0497e371 100644
+--- a/arch/arm/boot/dts/at91sam9x35.dtsi
++++ b/arch/arm/boot/dts/at91sam9x35.dtsi
+@@ -26,7 +26,7 @@
+                                     >;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9x35-pmc", "atmel,at91sam9x5-pmc", "syscon";
+                       };
+               };
+diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
+index 0c26c925761b2..af19ef2a875c4 100644
+--- a/arch/arm/boot/dts/at91sam9x5.dtsi
++++ b/arch/arm/boot/dts/at91sam9x5.dtsi
+@@ -126,7 +126,7 @@
+                               reg = <0xffffea00 0x200>;
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,at91sam9x5-pmc", "syscon";
+                               reg = <0xfffffc00 0x200>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
+index e67ede940071f..89aafb9a8b0fe 100644
+--- a/arch/arm/boot/dts/sam9x60.dtsi
++++ b/arch/arm/boot/dts/sam9x60.dtsi
+@@ -1282,7 +1282,7 @@
+                               };
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "microchip,sam9x60-pmc", "syscon";
+                               reg = <0xfffffc00 0x200>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
+index 14c35c12a115f..86009dd28e623 100644
+--- a/arch/arm/boot/dts/sama5d2.dtsi
++++ b/arch/arm/boot/dts/sama5d2.dtsi
+@@ -284,7 +284,7 @@
+                               clock-names = "dma_clk";
+                       };
+ 
+-                      pmc: pmc@f0014000 {
++                      pmc: clock-controller@f0014000 {
+                               compatible = "atmel,sama5d2-pmc", "syscon";
+                               reg = <0xf0014000 0x160>;
+                               interrupts = <74 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
+index bde8e92d60bb1..4524a16322d16 100644
+--- a/arch/arm/boot/dts/sama5d3.dtsi
++++ b/arch/arm/boot/dts/sama5d3.dtsi
+@@ -1001,7 +1001,7 @@
+                               };
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                               compatible = "atmel,sama5d3-pmc", "syscon";
+                               reg = <0xfffffc00 0x120>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/sama5d3_emac.dtsi b/arch/arm/boot/dts/sama5d3_emac.dtsi
+index 45226108850d2..5d7ce13de8ccf 100644
+--- a/arch/arm/boot/dts/sama5d3_emac.dtsi
++++ b/arch/arm/boot/dts/sama5d3_emac.dtsi
+@@ -30,7 +30,7 @@
+                               };
+                       };
+ 
+-                      pmc: pmc@fffffc00 {
++                      pmc: clock-controller@fffffc00 {
+                       };
+ 
+                       macb1: ethernet@f802c000 {
+diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
+index af62157ae214f..e94f3a661f4bb 100644
+--- a/arch/arm/boot/dts/sama5d4.dtsi
++++ b/arch/arm/boot/dts/sama5d4.dtsi
+@@ -250,7 +250,7 @@
+                               clock-names = "dma_clk";
+                       };
+ 
+-                      pmc: pmc@f0018000 {
++                      pmc: clock-controller@f0018000 {
+                               compatible = "atmel,sama5d4-pmc", "syscon";
+                               reg = <0xf0018000 0x120>;
+                               interrupts = <1 IRQ_TYPE_LEVEL_HIGH 7>;
+diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi
+index 929ba73702e93..b55adb96a06ec 100644
+--- a/arch/arm/boot/dts/sama7g5.dtsi
++++ b/arch/arm/boot/dts/sama7g5.dtsi
+@@ -241,7 +241,7 @@
+                       clocks = <&pmc PMC_TYPE_PERIPHERAL 11>;
+               };
+ 
+-              pmc: pmc@e0018000 {
++              pmc: clock-controller@e0018000 {
+                       compatible = "microchip,sama7g5-pmc", "syscon";
+                       reg = <0xe0018000 0x200>;
+                       interrupts = <GIC_SPI 10 IRQ_TYPE_LEVEL_HIGH>;
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm-dts-at91-use-clock-controller-name-for-sckc-node.patch b/queue-6.4/arm-dts-at91-use-clock-controller-name-for-sckc-node.patch

new file mode 100644 (file)

index 0000000..e5bd09c
--- /dev/null
+++ b/queue-6.4/arm-dts-at91-use-clock-controller-name-for-sckc-node.patch
@@ -0,0 +1,119 @@
+From 73f4815a4c82b965cf4ffa967eae3214ca3c3062 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 May 2023 12:41:18 +0300
+Subject: ARM: dts: at91: use clock-controller name for sckc nodes
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+[ Upstream commit 3ecb546333089195b6a1508cb58627b0797a26ca ]
+
+Use clock-controller generic name for slow clock controller nodes.
+
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Link: https://lore.kernel.org/r/20230517094119.2894220-5-claudiu.beznea@microchip.com
+Stable-dep-of: f6ad3c13f1b8 ("ARM: dts: at91: sam9x60: fix the SOC detection")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/at91sam9g45.dtsi | 2 +-
+ arch/arm/boot/dts/at91sam9rl.dtsi  | 2 +-
+ arch/arm/boot/dts/at91sam9x5.dtsi  | 2 +-
+ arch/arm/boot/dts/sam9x60.dtsi     | 2 +-
+ arch/arm/boot/dts/sama5d2.dtsi     | 2 +-
+ arch/arm/boot/dts/sama5d3.dtsi     | 2 +-
+ arch/arm/boot/dts/sama5d4.dtsi     | 2 +-
+ 7 files changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
+index 76afeb31b7f54..498cb92b29f96 100644
+--- a/arch/arm/boot/dts/at91sam9g45.dtsi
++++ b/arch/arm/boot/dts/at91sam9g45.dtsi
+@@ -923,7 +923,7 @@
+                               status = "disabled";
+                       };
+ 
+-                      clk32k: sckc@fffffd50 {
++                      clk32k: clock-controller@fffffd50 {
+                               compatible = "atmel,at91sam9x5-sckc";
+                               reg = <0xfffffd50 0x4>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi
+index a12e6c419fe3d..d7e8a115c916c 100644
+--- a/arch/arm/boot/dts/at91sam9rl.dtsi
++++ b/arch/arm/boot/dts/at91sam9rl.dtsi
+@@ -799,7 +799,7 @@
+                               status = "disabled";
+                       };
+ 
+-                      clk32k: sckc@fffffd50 {
++                      clk32k: clock-controller@fffffd50 {
+                               compatible = "atmel,at91sam9x5-sckc";
+                               reg = <0xfffffd50 0x4>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
+index af19ef2a875c4..0123ee47151cb 100644
+--- a/arch/arm/boot/dts/at91sam9x5.dtsi
++++ b/arch/arm/boot/dts/at91sam9x5.dtsi
+@@ -154,7 +154,7 @@
+                               clocks = <&pmc PMC_TYPE_CORE PMC_MCK>;
+                       };
+ 
+-                      clk32k: sckc@fffffe50 {
++                      clk32k: clock-controller@fffffe50 {
+                               compatible = "atmel,at91sam9x5-sckc";
+                               reg = <0xfffffe50 0x4>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
+index 89aafb9a8b0fe..c8bedfa987e57 100644
+--- a/arch/arm/boot/dts/sam9x60.dtsi
++++ b/arch/arm/boot/dts/sam9x60.dtsi
+@@ -1322,7 +1322,7 @@
+                               clocks = <&pmc PMC_TYPE_CORE PMC_MCK>;
+                       };
+ 
+-                      clk32k: sckc@fffffe50 {
++                      clk32k: clock-controller@fffffe50 {
+                               compatible = "microchip,sam9x60-sckc";
+                               reg = <0xfffffe50 0x4>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
+index 86009dd28e623..5f632e3f039e6 100644
+--- a/arch/arm/boot/dts/sama5d2.dtsi
++++ b/arch/arm/boot/dts/sama5d2.dtsi
+@@ -704,7 +704,7 @@
+                               status = "disabled";
+                       };
+ 
+-                      clk32k: sckc@f8048050 {
++                      clk32k: clock-controller@f8048050 {
+                               compatible = "atmel,sama5d4-sckc";
+                               reg = <0xf8048050 0x4>;
+ 
+diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
+index 4524a16322d16..0eebf6c760b3d 100644
+--- a/arch/arm/boot/dts/sama5d3.dtsi
++++ b/arch/arm/boot/dts/sama5d3.dtsi
+@@ -1040,7 +1040,7 @@
+                               status = "disabled";
+                       };
+ 
+-                      clk32k: sckc@fffffe50 {
++                      clk32k: clock-controller@fffffe50 {
+                               compatible = "atmel,sama5d3-sckc";
+                               reg = <0xfffffe50 0x4>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
+index e94f3a661f4bb..de6c829692327 100644
+--- a/arch/arm/boot/dts/sama5d4.dtsi
++++ b/arch/arm/boot/dts/sama5d4.dtsi
+@@ -761,7 +761,7 @@
+                               status = "disabled";
+                       };
+ 
+-                      clk32k: sckc@fc068650 {
++                      clk32k: clock-controller@fc068650 {
+                               compatible = "atmel,sama5d4-sckc";
+                               reg = <0xfc068650 0x4>;
+                               #clock-cells = <0>;
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm-dts-at91-use-generic-name-for-shutdown-controlle.patch b/queue-6.4/arm-dts-at91-use-generic-name-for-shutdown-controlle.patch

new file mode 100644 (file)

index 0000000..08a481b
--- /dev/null
+++ b/queue-6.4/arm-dts-at91-use-generic-name-for-shutdown-controlle.patch
@@ -0,0 +1,303 @@
+From 83b47ab15913859c88e89fa11473ab6c1b3c58a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Jun 2023 13:16:43 +0300
+Subject: ARM: dts: at91: use generic name for shutdown controller
+
+From: Claudiu Beznea <claudiu.beznea@microchip.com>
+
+[ Upstream commit 327ca228e58be498446244eb7cf39b892adda5d7 ]
+
+Use poweroff generic name for shdwc node to cope with device tree
+specifications.
+
+Signed-off-by: Claudiu Beznea <claudiu.beznea@microchip.com>
+Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
+Link: https://lore.kernel.org/r/20230616101646.879480-2-claudiu.beznea@microchip.com
+Stable-dep-of: f6ad3c13f1b8 ("ARM: dts: at91: sam9x60: fix the SOC detection")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/at91-qil_a9260.dts        | 2 +-
+ arch/arm/boot/dts/at91-sama5d27_som1_ek.dts | 2 +-
+ arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts   | 2 +-
+ arch/arm/boot/dts/at91-sama5d2_xplained.dts | 2 +-
+ arch/arm/boot/dts/at91sam9260.dtsi          | 2 +-
+ arch/arm/boot/dts/at91sam9260ek.dts         | 2 +-
+ arch/arm/boot/dts/at91sam9261.dtsi          | 2 +-
+ arch/arm/boot/dts/at91sam9263.dtsi          | 2 +-
+ arch/arm/boot/dts/at91sam9g20ek_common.dtsi | 2 +-
+ arch/arm/boot/dts/at91sam9g45.dtsi          | 2 +-
+ arch/arm/boot/dts/at91sam9n12.dtsi          | 2 +-
+ arch/arm/boot/dts/at91sam9rl.dtsi           | 2 +-
+ arch/arm/boot/dts/at91sam9x5.dtsi           | 2 +-
+ arch/arm/boot/dts/sam9x60.dtsi              | 2 +-
+ arch/arm/boot/dts/sama5d2.dtsi              | 2 +-
+ arch/arm/boot/dts/sama5d3.dtsi              | 2 +-
+ arch/arm/boot/dts/sama5d4.dtsi              | 2 +-
+ arch/arm/boot/dts/sama7g5.dtsi              | 2 +-
+ arch/arm/boot/dts/usb_a9260.dts             | 2 +-
+ arch/arm/boot/dts/usb_a9263.dts             | 2 +-
+ 20 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/arch/arm/boot/dts/at91-qil_a9260.dts b/arch/arm/boot/dts/at91-qil_a9260.dts
+index 9d26f99963483..5ccb3c139592d 100644
+--- a/arch/arm/boot/dts/at91-qil_a9260.dts
++++ b/arch/arm/boot/dts/at91-qil_a9260.dts
+@@ -108,7 +108,7 @@
+                               status = "okay";
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      shdwc: poweroff@fffffd10 {
+                               atmel,wakeup-counter = <10>;
+                               atmel,wakeup-rtt-timer;
+                       };
+diff --git a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
+index 52ddd0571f1c0..d0a6dbd377dfa 100644
+--- a/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
++++ b/arch/arm/boot/dts/at91-sama5d27_som1_ek.dts
+@@ -139,7 +139,7 @@
+                               };
+                       };
+ 
+-                      shdwc@f8048010 {
++                      poweroff@f8048010 {
+                               debounce-delay-us = <976>;
+                               atmel,wakeup-rtc-timer;
+ 
+diff --git a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
+index bf1c9ca72a9f3..200b20515ab12 100644
+--- a/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
++++ b/arch/arm/boot/dts/at91-sama5d2_ptc_ek.dts
+@@ -204,7 +204,7 @@
+                               };
+                       };
+ 
+-                      shdwc@f8048010 {
++                      poweroff@f8048010 {
+                               debounce-delay-us = <976>;
+ 
+                               input@0 {
+diff --git a/arch/arm/boot/dts/at91-sama5d2_xplained.dts b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+index 2d53c47d7cc86..6680031387e8c 100644
+--- a/arch/arm/boot/dts/at91-sama5d2_xplained.dts
++++ b/arch/arm/boot/dts/at91-sama5d2_xplained.dts
+@@ -348,7 +348,7 @@
+                               };
+                       };
+ 
+-                      shdwc@f8048010 {
++                      poweroff@f8048010 {
+                               debounce-delay-us = <976>;
+                               atmel,wakeup-rtc-timer;
+ 
+diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
+index 16e3b24b4dddb..35a007365b6a5 100644
+--- a/arch/arm/boot/dts/at91sam9260.dtsi
++++ b/arch/arm/boot/dts/at91sam9260.dtsi
+@@ -130,7 +130,7 @@
+                               clocks = <&pmc PMC_TYPE_CORE PMC_SLOW>;
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      shdwc: poweroff@fffffd10 {
+                               compatible = "atmel,at91sam9260-shdwc";
+                               reg = <0xfffffd10 0x10>;
+                               clocks = <&pmc PMC_TYPE_CORE PMC_SLOW>;
+diff --git a/arch/arm/boot/dts/at91sam9260ek.dts b/arch/arm/boot/dts/at91sam9260ek.dts
+index bb72f050a4fef..720c15472c4a5 100644
+--- a/arch/arm/boot/dts/at91sam9260ek.dts
++++ b/arch/arm/boot/dts/at91sam9260ek.dts
+@@ -112,7 +112,7 @@
+                               };
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      shdwc: poweroff@fffffd10 {
+                               atmel,wakeup-counter = <10>;
+                               atmel,wakeup-rtt-timer;
+                       };
+diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
+index fe9ead867e2ab..528ffc6f6f962 100644
+--- a/arch/arm/boot/dts/at91sam9261.dtsi
++++ b/arch/arm/boot/dts/at91sam9261.dtsi
+@@ -614,7 +614,7 @@
+                               clocks = <&slow_xtal>;
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      poweroff@fffffd10 {
+                               compatible = "atmel,at91sam9260-shdwc";
+                               reg = <0xfffffd10 0x10>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
+index ee5e6ed44dd40..75d8ff2d12c8a 100644
+--- a/arch/arm/boot/dts/at91sam9263.dtsi
++++ b/arch/arm/boot/dts/at91sam9263.dtsi
+@@ -158,7 +158,7 @@
+                               clocks = <&slow_xtal>;
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      poweroff@fffffd10 {
+                               compatible = "atmel,at91sam9260-shdwc";
+                               reg = <0xfffffd10 0x10>;
+                               clocks = <&slow_xtal>;
+diff --git a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+index 024af2db638eb..565b99e79c520 100644
+--- a/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
++++ b/arch/arm/boot/dts/at91sam9g20ek_common.dtsi
+@@ -126,7 +126,7 @@
+                               };
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      shdwc: poweroff@fffffd10 {
+                               atmel,wakeup-counter = <10>;
+                               atmel,wakeup-rtt-timer;
+                       };
+diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
+index 498cb92b29f96..7cccc606e36cd 100644
+--- a/arch/arm/boot/dts/at91sam9g45.dtsi
++++ b/arch/arm/boot/dts/at91sam9g45.dtsi
+@@ -152,7 +152,7 @@
+                       };
+ 
+ 
+-                      shdwc@fffffd10 {
++                      poweroff@fffffd10 {
+                               compatible = "atmel,at91sam9rl-shdwc";
+                               reg = <0xfffffd10 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
+index c2e7460fb7ff6..16a9a908985da 100644
+--- a/arch/arm/boot/dts/at91sam9n12.dtsi
++++ b/arch/arm/boot/dts/at91sam9n12.dtsi
+@@ -140,7 +140,7 @@
+                               clocks = <&pmc PMC_TYPE_CORE PMC_MCK>;
+                       };
+ 
+-                      shdwc@fffffe10 {
++                      poweroff@fffffe10 {
+                               compatible = "atmel,at91sam9x5-shdwc";
+                               reg = <0xfffffe10 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/at91sam9rl.dtsi b/arch/arm/boot/dts/at91sam9rl.dtsi
+index d7e8a115c916c..3d089ffbe1626 100644
+--- a/arch/arm/boot/dts/at91sam9rl.dtsi
++++ b/arch/arm/boot/dts/at91sam9rl.dtsi
+@@ -778,7 +778,7 @@
+                               clocks = <&clk32k>;
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      poweroff@fffffd10 {
+                               compatible = "atmel,at91sam9260-shdwc";
+                               reg = <0xfffffd10 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
+index 0123ee47151cb..a1fed912f2eea 100644
+--- a/arch/arm/boot/dts/at91sam9x5.dtsi
++++ b/arch/arm/boot/dts/at91sam9x5.dtsi
+@@ -141,7 +141,7 @@
+                               clocks = <&clk32k>;
+                       };
+ 
+-                      shutdown_controller: shdwc@fffffe10 {
++                      shutdown_controller: poweroff@fffffe10 {
+                               compatible = "atmel,at91sam9x5-shdwc";
+                               reg = <0xfffffe10 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi
+index c8bedfa987e57..8b53997675e75 100644
+--- a/arch/arm/boot/dts/sam9x60.dtsi
++++ b/arch/arm/boot/dts/sam9x60.dtsi
+@@ -1297,7 +1297,7 @@
+                               clocks = <&clk32k 0>;
+                       };
+ 
+-                      shutdown_controller: shdwc@fffffe10 {
++                      shutdown_controller: poweroff@fffffe10 {
+                               compatible = "microchip,sam9x60-shdwc";
+                               reg = <0xfffffe10 0x10>;
+                               clocks = <&clk32k 0>;
+diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi
+index 5f632e3f039e6..8ae270fabfa82 100644
+--- a/arch/arm/boot/dts/sama5d2.dtsi
++++ b/arch/arm/boot/dts/sama5d2.dtsi
+@@ -680,7 +680,7 @@
+                               clocks = <&clk32k>;
+                       };
+ 
+-                      shutdown_controller: shdwc@f8048010 {
++                      shutdown_controller: poweroff@f8048010 {
+                               compatible = "atmel,sama5d2-shdwc";
+                               reg = <0xf8048010 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
+index 0eebf6c760b3d..d9e66700d1c20 100644
+--- a/arch/arm/boot/dts/sama5d3.dtsi
++++ b/arch/arm/boot/dts/sama5d3.dtsi
+@@ -1016,7 +1016,7 @@
+                               clocks = <&clk32k>;
+                       };
+ 
+-                      shutdown_controller: shutdown-controller@fffffe10 {
++                      shutdown_controller: poweroff@fffffe10 {
+                               compatible = "atmel,at91sam9x5-shdwc";
+                               reg = <0xfffffe10 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
+index de6c829692327..41284e013f531 100644
+--- a/arch/arm/boot/dts/sama5d4.dtsi
++++ b/arch/arm/boot/dts/sama5d4.dtsi
+@@ -740,7 +740,7 @@
+                               clocks = <&clk32k>;
+                       };
+ 
+-                      shutdown_controller: shdwc@fc068610 {
++                      shutdown_controller: poweroff@fc068610 {
+                               compatible = "atmel,at91sam9x5-shdwc";
+                               reg = <0xfc068610 0x10>;
+                               clocks = <&clk32k>;
+diff --git a/arch/arm/boot/dts/sama7g5.dtsi b/arch/arm/boot/dts/sama7g5.dtsi
+index b55adb96a06ec..9642a42d84e60 100644
+--- a/arch/arm/boot/dts/sama7g5.dtsi
++++ b/arch/arm/boot/dts/sama7g5.dtsi
+@@ -257,7 +257,7 @@
+                       clocks = <&clk32k 0>;
+               };
+ 
+-              shdwc: shdwc@e001d010 {
++              shdwc: poweroff@e001d010 {
+                       compatible = "microchip,sama7g5-shdwc", "syscon";
+                       reg = <0xe001d010 0x10>;
+                       clocks = <&clk32k 0>;
+diff --git a/arch/arm/boot/dts/usb_a9260.dts b/arch/arm/boot/dts/usb_a9260.dts
+index 6cfa83921ac26..66f8da89007db 100644
+--- a/arch/arm/boot/dts/usb_a9260.dts
++++ b/arch/arm/boot/dts/usb_a9260.dts
+@@ -22,7 +22,7 @@
+ 
+       ahb {
+               apb {
+-                      shdwc@fffffd10 {
++                      shdwc: poweroff@fffffd10 {
+                               atmel,wakeup-counter = <10>;
+                               atmel,wakeup-rtt-timer;
+                       };
+diff --git a/arch/arm/boot/dts/usb_a9263.dts b/arch/arm/boot/dts/usb_a9263.dts
+index b6cb9cdf81973..45745915b2e16 100644
+--- a/arch/arm/boot/dts/usb_a9263.dts
++++ b/arch/arm/boot/dts/usb_a9263.dts
+@@ -67,7 +67,7 @@
+                               };
+                       };
+ 
+-                      shdwc@fffffd10 {
++                      poweroff@fffffd10 {
+                               atmel,wakeup-counter = <10>;
+                               atmel,wakeup-rtt-timer;
+                       };
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm-dts-nxp-imx-limit-sk-imx53-supported-frequencies.patch b/queue-6.4/arm-dts-nxp-imx-limit-sk-imx53-supported-frequencies.patch

new file mode 100644 (file)

index 0000000..8beac3a
--- /dev/null
+++ b/queue-6.4/arm-dts-nxp-imx-limit-sk-imx53-supported-frequencies.patch
@@ -0,0 +1,46 @@
+From 8df2ec014efb98c17a362142ddfd76217c0ed776 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 9 Jul 2023 23:30:19 +0300
+Subject: ARM: dts: nxp/imx: limit sk-imx53 supported frequencies
+
+From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+
+[ Upstream commit c486762fb17c99fd642beea3e1e4744d093c262a ]
+
+The SK-IMX53 board, bearing i.MX536A CPU, is not stable when running at
+1.2 GHz (default iMX53 maximum). The SoC is only rated up to 800 MHz.
+Disable 1.2 GHz and 1 GHz frequencies.
+
+Fixes: 0b8576d8440a ("ARM: dts: imx: Add support for SK-iMX53 board")
+Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/imx53-sk-imx53.dts | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/arch/arm/boot/dts/imx53-sk-imx53.dts b/arch/arm/boot/dts/imx53-sk-imx53.dts
+index 103e73176e47d..1a00d290092ad 100644
+--- a/arch/arm/boot/dts/imx53-sk-imx53.dts
++++ b/arch/arm/boot/dts/imx53-sk-imx53.dts
+@@ -60,6 +60,16 @@
+       status = "okay";
+ };
+ 
++&cpu0 {
++      /* CPU rated to 800 MHz, not the default 1.2GHz. */
++      operating-points = <
++              /* kHz   uV */
++              166666  850000
++              400000  900000
++              800000  1050000
++      >;
++};
++
+ &ecspi1 {
+       pinctrl-names = "default";
+       pinctrl-0 = <&pinctrl_ecspi1>;
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-freescale-fix-vpu-g2-clock.patch b/queue-6.4/arm64-dts-freescale-fix-vpu-g2-clock.patch

new file mode 100644 (file)

index 0000000..3d0ab29
--- /dev/null
+++ b/queue-6.4/arm64-dts-freescale-fix-vpu-g2-clock.patch
@@ -0,0 +1,37 @@
+From 1fdd30d2d8ae803a66f5eda6e6db5281e47609e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 11:42:00 +0200
+Subject: arm64: dts: freescale: Fix VPU G2 clock
+
+From: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+
+[ Upstream commit b27bfc5103c72f84859bd32731b6a09eafdeda05 ]
+
+Set VPU G2 clock to 300MHz like described in documentation.
+This fixes pixels error occurring with large resolution ( >= 2560x1600)
+HEVC test stream when using the postprocessor to produce NV12.
+
+Fixes: 4ac7e4a81272 ("arm64: dts: imx8mq: Enable both G1 and G2 VPU's with vpu-blk-ctrl")
+Signed-off-by: Benjamin Gaignard <benjamin.gaignard@collabora.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+index 0492556a10dbc..345c70c6c697a 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+@@ -770,7 +770,7 @@
+                                                                        <&clk IMX8MQ_SYS1_PLL_800M>,
+                                                                        <&clk IMX8MQ_VPU_PLL>;
+                                               assigned-clock-rates = <600000000>,
+-                                                                     <600000000>,
++                                                                     <300000000>,
+                                                                      <800000000>,
+                                                                      <0>;
+                                       };
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-imx8mm-venice-gw7903-disable-disp_blk_ctrl.patch b/queue-6.4/arm64-dts-imx8mm-venice-gw7903-disable-disp_blk_ctrl.patch

new file mode 100644 (file)

index 0000000..c8d9199
--- /dev/null
+++ b/queue-6.4/arm64-dts-imx8mm-venice-gw7903-disable-disp_blk_ctrl.patch
@@ -0,0 +1,42 @@
+From 442c0c00431c960b96cbc394f0ef8d7a4f2049be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 08:39:45 -0700
+Subject: arm64: dts: imx8mm-venice-gw7903: disable disp_blk_ctrl
+
+From: Tim Harvey <tharvey@gateworks.com>
+
+[ Upstream commit 3e7d3c5e13b05dda9db92d98803a626378e75438 ]
+
+The GW7903 does not connect the VDD_MIPI power rails thus MIPI is
+disabled. However we must also disable disp_blk_ctrl as it uses the
+pgc_mipi power domain and without it being disabled imx8m-blk-ctrl will
+fail to probe:
+imx8m-blk-ctrl 32e28000.blk-ctrl: error -ETIMEDOUT: failed to attach power domain "mipi-dsi"
+imx8m-blk-ctrl: probe of 32e28000.blk-ctrl failed with error -110
+
+Fixes: a72ba91e5bc7 ("arm64: dts: imx: Add i.mx8mm Gateworks gw7903 dts support")
+Signed-off-by: Tim Harvey <tharvey@gateworks.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
+index 363020a08c9b8..4660d086cb099 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts
+@@ -567,6 +567,10 @@
+       status = "okay";
+ };
+ 
++&disp_blk_ctrl {
++      status = "disabled";
++};
++
+ &pgc_mipi {
+       status = "disabled";
+ };
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-imx8mm-venice-gw7904-disable-disp_blk_ctrl.patch b/queue-6.4/arm64-dts-imx8mm-venice-gw7904-disable-disp_blk_ctrl.patch

new file mode 100644 (file)

index 0000000..d9a30bc
--- /dev/null
+++ b/queue-6.4/arm64-dts-imx8mm-venice-gw7904-disable-disp_blk_ctrl.patch
@@ -0,0 +1,43 @@
+From e09212893a86fb4ff15d3cffad1064bc3541dde4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 08:40:30 -0700
+Subject: arm64: dts: imx8mm-venice-gw7904: disable disp_blk_ctrl
+
+From: Tim Harvey <tharvey@gateworks.com>
+
+[ Upstream commit f7a0b57524cf811ac06257a5099f1b7c19ee7310 ]
+
+The GW7904 does not connect the VDD_MIPI power rails thus MIPI is
+disabled. However we must also disable disp_blk_ctrl as it uses the
+pgc_mipi power domain and without it being disabled imx8m-blk-ctrl will
+fail to probe:
+imx8m-blk-ctrl 32e28000.blk-ctrl: error -ETIMEDOUT: failed to attach
+power domain "mipi-dsi"
+imx8m-blk-ctrl: probe of 32e28000.blk-ctrl failed with error -110
+
+Fixes: b999bdaf0597 ("arm64: dts: imx: Add i.mx8mm Gateworks gw7904 dts support")
+Signed-off-by: Tim Harvey <tharvey@gateworks.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
+index 93088fa1c3b9c..d5b7168558124 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts
+@@ -628,6 +628,10 @@
+       status = "okay";
+ };
+ 
++&disp_blk_ctrl {
++      status = "disabled";
++};
++
+ &pgc_mipi {
+       status = "disabled";
+ };
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-imx8mn-var-som-add-missing-pull-up-for-onb.patch b/queue-6.4/arm64-dts-imx8mn-var-som-add-missing-pull-up-for-onb.patch

new file mode 100644 (file)

index 0000000..309283a
--- /dev/null
+++ b/queue-6.4/arm64-dts-imx8mn-var-som-add-missing-pull-up-for-onb.patch
@@ -0,0 +1,47 @@
+From 53c4367ef8a1824fa3caf147edba6b5d38587213 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jul 2023 09:48:00 -0400
+Subject: arm64: dts: imx8mn-var-som: add missing pull-up for onboard PHY reset
+ pinmux
+
+From: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+
+[ Upstream commit 253be5b53c2792fb4384f8005b05421e6f040ee3 ]
+
+For SOMs with an onboard PHY, the RESET_N pull-up resistor is
+currently deactivated in the pinmux configuration. When the pinmux
+code selects the GPIO function for this pin, with a default direction
+of input, this prevents the RESET_N pin from being taken to the proper
+3.3V level (deasserted), and this results in the PHY being not
+detected since it is held in reset.
+
+Taken from RESET_N pin description in ADIN13000 datasheet:
+    This pin requires a 1K pull-up resistor to AVDD_3P3.
+
+Activate the pull-up resistor to fix the issue.
+
+Fixes: ade0176dd8a0 ("arm64: dts: imx8mn-var-som: Add Variscite VAR-SOM-MX8MN System on Module")
+Signed-off-by: Hugo Villeneuve <hvilleneuve@dimonoff.com>
+Reviewed-by: Fabio Estevam <festevam@gmail.com>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+index cbd9d124c80d0..c9d4fb75c21d3 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi
+@@ -351,7 +351,7 @@
+                       MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC           0x91
+                       MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL     0x91
+                       MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL     0x1f
+-                      MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9               0x19
++                      MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9               0x159
+               >;
+       };
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-phycore-imx8mm-correction-in-gpio-line-nam.patch b/queue-6.4/arm64-dts-phycore-imx8mm-correction-in-gpio-line-nam.patch

new file mode 100644 (file)

index 0000000..ebdbf1a
--- /dev/null
+++ b/queue-6.4/arm64-dts-phycore-imx8mm-correction-in-gpio-line-nam.patch
@@ -0,0 +1,51 @@
+From fae76e851b0d2ba213ca8894b1c6f73ee5fb2729 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Jun 2023 11:50:09 +0200
+Subject: arm64: dts: phycore-imx8mm: Correction in gpio-line-names
+
+From: Yashwanth Varakala <y.varakala@phytec.de>
+
+[ Upstream commit 1ef0aa137a96c5f0564f2db0c556a4f0f60ce8f5 ]
+
+Remove unused nINT_ETHPHY entry from gpio-line-names in gpio1 nodes of
+phyCORE-i.MX8MM and phyBOARD-Polis-i.MX8MM devicetrees.
+
+Fixes: ae6847f26ac9 ("arm64: dts: freescale: Add phyBOARD-Polis-i.MX8MM support")
+Signed-off-by: Yashwanth Varakala <y.varakala@phytec.de>
+Signed-off-by: Cem Tenruh <c.tenruh@phytec.de>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts | 2 +-
+ arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi       | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
+index 03e7679217b24..479948f8a4b75 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
++++ b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts
+@@ -141,7 +141,7 @@
+ };
+ 
+ &gpio1 {
+-      gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT",
++      gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT",
+               "", "", "", "RESET_ETHPHY",
+               "CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "",
+               "USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE";
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+index 2dd179ec923d7..847f08537b48a 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+@@ -111,7 +111,7 @@
+ };
+ 
+ &gpio1 {
+-      gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT",
++      gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT",
+               "", "", "", "RESET_ETHPHY",
+               "", "", "nENABLE_FLATLINK";
+ };
+-- 
+2.40.1
+
diff --git a/queue-6.4/arm64-dts-phycore-imx8mm-label-typo-fix-of-vpu.patch b/queue-6.4/arm64-dts-phycore-imx8mm-label-typo-fix-of-vpu.patch

new file mode 100644 (file)

index 0000000..d5947c2
--- /dev/null
+++ b/queue-6.4/arm64-dts-phycore-imx8mm-label-typo-fix-of-vpu.patch
@@ -0,0 +1,37 @@
+From 248d988204a02bb47862d7fdda3b51bbf799f9c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 16 Jun 2023 11:50:07 +0200
+Subject: arm64: dts: phycore-imx8mm: Label typo-fix of VPU
+
+From: Yashwanth Varakala <y.varakala@phytec.de>
+
+[ Upstream commit cddeefc1663294fb74b31ff5029a83c0e819ff3a ]
+
+Corrected the label of the VPU regulator node (buck 3)
+from reg_vdd_gpu to reg_vdd_vpu.
+
+Fixes: ae6847f26ac9 ("arm64: dts: freescale: Add phyBOARD-Polis-i.MX8MM support")
+Signed-off-by: Yashwanth Varakala <y.varakala@phytec.de>
+Signed-off-by: Cem Tenruh <c.tenruh@phytec.de>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+index 92616bc4f71f5..2dd179ec923d7 100644
+--- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
++++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi
+@@ -210,7 +210,7 @@
+                               };
+                       };
+ 
+-                      reg_vdd_gpu: buck3 {
++                      reg_vdd_vpu: buck3 {
+                               regulator-always-on;
+                               regulator-boot-on;
+                               regulator-max-microvolt = <1000000>;
+-- 
+2.40.1
+
diff --git a/queue-6.4/bnxt-don-t-handle-xdp-in-netpoll.patch b/queue-6.4/bnxt-don-t-handle-xdp-in-netpoll.patch

new file mode 100644 (file)

index 0000000..e44102d
--- /dev/null
+++ b/queue-6.4/bnxt-don-t-handle-xdp-in-netpoll.patch
@@ -0,0 +1,199 @@
+From 7f31cba758d8c1d98474a38f3949a283bc4c8474 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 13:50:20 -0700
+Subject: bnxt: don't handle XDP in netpoll
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 37b61cda9c1606cd8b6445d900ca9dc03185e8b6 ]
+
+Similarly to other recently fixed drivers make sure we don't
+try to access XDP or page pool APIs when NAPI budget is 0.
+NAPI budget of 0 may mean that we are in netpoll.
+
+This may result in running software IRQs in hard IRQ context,
+leading to deadlocks or crashes.
+
+To make sure bnapi->tx_pkts don't get wiped without handling
+the events, move clearing the field into the handler itself.
+Remember to clear tx_pkts after reset (bnxt_enable_napi())
+as it's technically possible that netpoll will accumulate
+some tx_pkts and then a reset will happen, leaving tx_pkts
+out of sync with reality.
+
+Fixes: 322b87ca55f2 ("bnxt_en: add page_pool support")
+Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
+Reviewed-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20230728205020.2784844-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 26 +++++++++++--------
+ drivers/net/ethernet/broadcom/bnxt/bnxt.h     |  2 +-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |  8 +++++-
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h |  2 +-
+ 4 files changed, 24 insertions(+), 14 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index b499bc9c4e067..0b314bf4fbe65 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -633,12 +633,13 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev)
+       return NETDEV_TX_OK;
+ }
+ 
+-static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
++static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+ {
+       struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+       struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index);
+       u16 cons = txr->tx_cons;
+       struct pci_dev *pdev = bp->pdev;
++      int nr_pkts = bnapi->tx_pkts;
+       int i;
+       unsigned int tx_bytes = 0;
+ 
+@@ -688,6 +689,7 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+               dev_kfree_skb_any(skb);
+       }
+ 
++      bnapi->tx_pkts = 0;
+       WRITE_ONCE(txr->tx_cons, cons);
+ 
+       __netif_txq_completed_wake(txq, nr_pkts, tx_bytes,
+@@ -2573,12 +2575,11 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+       return rx_pkts;
+ }
+ 
+-static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi)
++static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi,
++                                int budget)
+ {
+-      if (bnapi->tx_pkts) {
+-              bnapi->tx_int(bp, bnapi, bnapi->tx_pkts);
+-              bnapi->tx_pkts = 0;
+-      }
++      if (bnapi->tx_pkts)
++              bnapi->tx_int(bp, bnapi, budget);
+ 
+       if ((bnapi->events & BNXT_RX_EVENT) && !(bnapi->in_reset)) {
+               struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+@@ -2607,7 +2608,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
+        */
+       bnxt_db_cq(bp, &cpr->cp_db, cpr->cp_raw_cons);
+ 
+-      __bnxt_poll_work_done(bp, bnapi);
++      __bnxt_poll_work_done(bp, bnapi, budget);
+       return rx_pkts;
+ }
+ 
+@@ -2738,7 +2739,7 @@ static int __bnxt_poll_cqs(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+ }
+ 
+ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
+-                               u64 dbr_type)
++                               u64 dbr_type, int budget)
+ {
+       struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
+       int i;
+@@ -2754,7 +2755,7 @@ static void __bnxt_poll_cqs_done(struct bnxt *bp, struct bnxt_napi *bnapi,
+                       cpr2->had_work_done = 0;
+               }
+       }
+-      __bnxt_poll_work_done(bp, bnapi);
++      __bnxt_poll_work_done(bp, bnapi, budget);
+ }
+ 
+ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
+@@ -2784,7 +2785,8 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
+                       if (cpr->has_more_work)
+                               break;
+ 
+-                      __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL);
++                      __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ_ARMALL,
++                                           budget);
+                       cpr->cp_raw_cons = raw_cons;
+                       if (napi_complete_done(napi, work_done))
+                               BNXT_DB_NQ_ARM_P5(&cpr->cp_db,
+@@ -2814,7 +2816,7 @@ static int bnxt_poll_p5(struct napi_struct *napi, int budget)
+               }
+               raw_cons = NEXT_RAW_CMP(raw_cons);
+       }
+-      __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ);
++      __bnxt_poll_cqs_done(bp, bnapi, DBR_TYPE_CQ, budget);
+       if (raw_cons != cpr->cp_raw_cons) {
+               cpr->cp_raw_cons = raw_cons;
+               BNXT_DB_NQ_P5(&cpr->cp_db, raw_cons);
+@@ -9433,6 +9435,8 @@ static void bnxt_enable_napi(struct bnxt *bp)
+                       cpr->sw_stats.rx.rx_resets++;
+               bnapi->in_reset = false;
+ 
++              bnapi->tx_pkts = 0;
++
+               if (bnapi->rx_ring) {
+                       INIT_WORK(&cpr->dim.work, bnxt_dim_work);
+                       cpr->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE;
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+index 080e73496066b..bb95c3dc5270f 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+@@ -1005,7 +1005,7 @@ struct bnxt_napi {
+       struct bnxt_tx_ring_info        *tx_ring;
+ 
+       void                    (*tx_int)(struct bnxt *, struct bnxt_napi *,
+-                                        int);
++                                        int budget);
+       int                     tx_pkts;
+       u8                      events;
+ 
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index 4efa5fe6972b2..7f2f9a317d473 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -125,16 +125,20 @@ static void __bnxt_xmit_xdp_redirect(struct bnxt *bp,
+       dma_unmap_len_set(tx_buf, len, 0);
+ }
+ 
+-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
++void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+ {
+       struct bnxt_tx_ring_info *txr = bnapi->tx_ring;
+       struct bnxt_rx_ring_info *rxr = bnapi->rx_ring;
+       bool rx_doorbell_needed = false;
++      int nr_pkts = bnapi->tx_pkts;
+       struct bnxt_sw_tx_bd *tx_buf;
+       u16 tx_cons = txr->tx_cons;
+       u16 last_tx_cons = tx_cons;
+       int i, j, frags;
+ 
++      if (!budget)
++              return;
++
+       for (i = 0; i < nr_pkts; i++) {
+               tx_buf = &txr->tx_buf_ring[tx_cons];
+ 
+@@ -161,6 +165,8 @@ void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts)
+               }
+               tx_cons = NEXT_TX(tx_cons);
+       }
++
++      bnapi->tx_pkts = 0;
+       WRITE_ONCE(txr->tx_cons, tx_cons);
+       if (rx_doorbell_needed) {
+               tx_buf = &txr->tx_buf_ring[last_tx_cons];
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+index ea430d6961df3..5e412c5655ba5 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.h
+@@ -16,7 +16,7 @@ struct bnxt_sw_tx_bd *bnxt_xmit_bd(struct bnxt *bp,
+                                  struct bnxt_tx_ring_info *txr,
+                                  dma_addr_t mapping, u32 len,
+                                  struct xdp_buff *xdp);
+-void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int nr_pkts);
++void bnxt_tx_int_xdp(struct bnxt *bp, struct bnxt_napi *bnapi, int budget);
+ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+                struct xdp_buff xdp, struct page *page, u8 **data_ptr,
+                unsigned int *len, u8 *event);
+-- 
+2.40.1
+
diff --git a/queue-6.4/bnxt_en-fix-max_mtu-setting-for-multi-buf-xdp.patch b/queue-6.4/bnxt_en-fix-max_mtu-setting-for-multi-buf-xdp.patch

new file mode 100644 (file)

index 0000000..03b2d3b
--- /dev/null
+++ b/queue-6.4/bnxt_en-fix-max_mtu-setting-for-multi-buf-xdp.patch
@@ -0,0 +1,72 @@
+From 73c914d132815c03b643e05742761e14d773ddc8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 07:20:43 -0700
+Subject: bnxt_en: Fix max_mtu setting for multi-buf XDP
+
+From: Michael Chan <michael.chan@broadcom.com>
+
+[ Upstream commit 08450ea98ae98d5a35145b675b76db616046ea11 ]
+
+The existing code does not allow the MTU to be set to the maximum even
+after an XDP program supporting multiple buffers is attached.  Fix it
+to set the netdev->max_mtu to the maximum value if the attached XDP
+program supports mutiple buffers, regardless of the current MTU value.
+
+Also use a local variable dev instead of repeatedly using bp->dev.
+
+Fixes: 1dc4c557bfed ("bnxt: adding bnxt_xdp_build_skb to build skb from multibuffer xdp_buff")
+Reviewed-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20230731142043.58855-3-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c | 17 ++++++++++-------
+ 1 file changed, 10 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 81ed3744fa330..e481960cb6c7a 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -4005,26 +4005,29 @@ void bnxt_set_ring_params(struct bnxt *bp)
+  */
+ int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode)
+ {
++      struct net_device *dev = bp->dev;
++
+       if (page_mode) {
+               bp->flags &= ~BNXT_FLAG_AGG_RINGS;
+               bp->flags |= BNXT_FLAG_RX_PAGE_MODE;
+ 
+-              if (bp->dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
++              if (bp->xdp_prog->aux->xdp_has_frags)
++                      dev->max_mtu = min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
++              else
++                      dev->max_mtu =
++                              min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
++              if (dev->mtu > BNXT_MAX_PAGE_MODE_MTU) {
+                       bp->flags |= BNXT_FLAG_JUMBO;
+                       bp->rx_skb_func = bnxt_rx_multi_page_skb;
+-                      bp->dev->max_mtu =
+-                              min_t(u16, bp->max_mtu, BNXT_MAX_MTU);
+               } else {
+                       bp->flags |= BNXT_FLAG_NO_AGG_RINGS;
+                       bp->rx_skb_func = bnxt_rx_page_skb;
+-                      bp->dev->max_mtu =
+-                              min_t(u16, bp->max_mtu, BNXT_MAX_PAGE_MODE_MTU);
+               }
+               bp->rx_dir = DMA_BIDIRECTIONAL;
+               /* Disable LRO or GRO_HW */
+-              netdev_update_features(bp->dev);
++              netdev_update_features(dev);
+       } else {
+-              bp->dev->max_mtu = bp->max_mtu;
++              dev->max_mtu = bp->max_mtu;
+               bp->flags &= ~BNXT_FLAG_RX_PAGE_MODE;
+               bp->rx_dir = DMA_FROM_DEVICE;
+               bp->rx_skb_func = bnxt_rx_skb;
+-- 
+2.40.1
+
diff --git a/queue-6.4/bnxt_en-fix-page-pool-logic-for-page-size-64k.patch b/queue-6.4/bnxt_en-fix-page-pool-logic-for-page-size-64k.patch

new file mode 100644 (file)

index 0000000..c4aa331
--- /dev/null
+++ b/queue-6.4/bnxt_en-fix-page-pool-logic-for-page-size-64k.patch
@@ -0,0 +1,193 @@
+From 7b3d2a424ee64db32526fe842d536affe2c11cd8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 07:20:42 -0700
+Subject: bnxt_en: Fix page pool logic for page size >= 64K
+
+From: Somnath Kotur <somnath.kotur@broadcom.com>
+
+[ Upstream commit f6974b4c2d8e1062b5a52228ee47293c15b4ee1e ]
+
+The RXBD length field on all bnxt chips is 16-bit and so we cannot
+support a full page when the native page size is 64K or greater.
+The non-XDP (non page pool) code path has logic to handle this but
+the XDP page pool code path does not handle this.  Add the missing
+logic to use page_pool_dev_alloc_frag() to allocate 32K chunks if
+the page size is 64K or greater.
+
+Fixes: 9f4b28301ce6 ("bnxt: XDP multibuffer enablement")
+Link: https://lore.kernel.org/netdev/20230728231829.235716-2-michael.chan@broadcom.com/
+Reviewed-by: Andy Gospodarek <andrew.gospodarek@broadcom.com>
+Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
+Signed-off-by: Michael Chan <michael.chan@broadcom.com>
+Link: https://lore.kernel.org/r/20230731142043.58855-2-michael.chan@broadcom.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/bnxt/bnxt.c     | 42 ++++++++++++-------
+ drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c |  6 +--
+ 2 files changed, 29 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+index 0b314bf4fbe65..81ed3744fa330 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+@@ -699,17 +699,24 @@ static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
+ 
+ static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping,
+                                        struct bnxt_rx_ring_info *rxr,
++                                       unsigned int *offset,
+                                        gfp_t gfp)
+ {
+       struct device *dev = &bp->pdev->dev;
+       struct page *page;
+ 
+-      page = page_pool_dev_alloc_pages(rxr->page_pool);
++      if (PAGE_SIZE > BNXT_RX_PAGE_SIZE) {
++              page = page_pool_dev_alloc_frag(rxr->page_pool, offset,
++                                              BNXT_RX_PAGE_SIZE);
++      } else {
++              page = page_pool_dev_alloc_pages(rxr->page_pool);
++              *offset = 0;
++      }
+       if (!page)
+               return NULL;
+ 
+-      *mapping = dma_map_page_attrs(dev, page, 0, PAGE_SIZE, bp->rx_dir,
+-                                    DMA_ATTR_WEAK_ORDERING);
++      *mapping = dma_map_page_attrs(dev, page, *offset, BNXT_RX_PAGE_SIZE,
++                                    bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
+       if (dma_mapping_error(dev, *mapping)) {
+               page_pool_recycle_direct(rxr->page_pool, page);
+               return NULL;
+@@ -749,15 +756,16 @@ int bnxt_alloc_rx_data(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+       dma_addr_t mapping;
+ 
+       if (BNXT_RX_PAGE_MODE(bp)) {
++              unsigned int offset;
+               struct page *page =
+-                      __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
++                      __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
+ 
+               if (!page)
+                       return -ENOMEM;
+ 
+               mapping += bp->rx_dma_offset;
+               rx_buf->data = page;
+-              rx_buf->data_ptr = page_address(page) + bp->rx_offset;
++              rx_buf->data_ptr = page_address(page) + offset + bp->rx_offset;
+       } else {
+               u8 *data = __bnxt_alloc_rx_frag(bp, &mapping, gfp);
+ 
+@@ -817,7 +825,7 @@ static inline int bnxt_alloc_rx_page(struct bnxt *bp,
+       unsigned int offset = 0;
+ 
+       if (BNXT_RX_PAGE_MODE(bp)) {
+-              page = __bnxt_alloc_rx_page(bp, &mapping, rxr, gfp);
++              page = __bnxt_alloc_rx_page(bp, &mapping, rxr, &offset, gfp);
+ 
+               if (!page)
+                       return -ENOMEM;
+@@ -964,15 +972,15 @@ static struct sk_buff *bnxt_rx_multi_page_skb(struct bnxt *bp,
+               return NULL;
+       }
+       dma_addr -= bp->rx_dma_offset;
+-      dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
+-                           DMA_ATTR_WEAK_ORDERING);
+-      skb = build_skb(page_address(page), PAGE_SIZE);
++      dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
++                           bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
++      skb = build_skb(data_ptr - bp->rx_offset, BNXT_RX_PAGE_SIZE);
+       if (!skb) {
+               page_pool_recycle_direct(rxr->page_pool, page);
+               return NULL;
+       }
+       skb_mark_for_recycle(skb);
+-      skb_reserve(skb, bp->rx_dma_offset);
++      skb_reserve(skb, bp->rx_offset);
+       __skb_put(skb, len);
+ 
+       return skb;
+@@ -998,8 +1006,8 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
+               return NULL;
+       }
+       dma_addr -= bp->rx_dma_offset;
+-      dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, PAGE_SIZE, bp->rx_dir,
+-                           DMA_ATTR_WEAK_ORDERING);
++      dma_unmap_page_attrs(&bp->pdev->dev, dma_addr, BNXT_RX_PAGE_SIZE,
++                           bp->rx_dir, DMA_ATTR_WEAK_ORDERING);
+ 
+       if (unlikely(!payload))
+               payload = eth_get_headlen(bp->dev, data_ptr, len);
+@@ -1012,7 +1020,7 @@ static struct sk_buff *bnxt_rx_page_skb(struct bnxt *bp,
+ 
+       skb_mark_for_recycle(skb);
+       off = (void *)data_ptr - page_address(page);
+-      skb_add_rx_frag(skb, 0, page, off, len, PAGE_SIZE);
++      skb_add_rx_frag(skb, 0, page, off, len, BNXT_RX_PAGE_SIZE);
+       memcpy(skb->data - NET_IP_ALIGN, data_ptr - NET_IP_ALIGN,
+              payload + NET_IP_ALIGN);
+ 
+@@ -1147,7 +1155,7 @@ static struct sk_buff *bnxt_rx_agg_pages_skb(struct bnxt *bp,
+ 
+       skb->data_len += total_frag_len;
+       skb->len += total_frag_len;
+-      skb->truesize += PAGE_SIZE * agg_bufs;
++      skb->truesize += BNXT_RX_PAGE_SIZE * agg_bufs;
+       return skb;
+ }
+ 
+@@ -2949,8 +2957,8 @@ static void bnxt_free_one_rx_ring_skbs(struct bnxt *bp, int ring_nr)
+               rx_buf->data = NULL;
+               if (BNXT_RX_PAGE_MODE(bp)) {
+                       mapping -= bp->rx_dma_offset;
+-                      dma_unmap_page_attrs(&pdev->dev, mapping, PAGE_SIZE,
+-                                           bp->rx_dir,
++                      dma_unmap_page_attrs(&pdev->dev, mapping,
++                                           BNXT_RX_PAGE_SIZE, bp->rx_dir,
+                                            DMA_ATTR_WEAK_ORDERING);
+                       page_pool_recycle_direct(rxr->page_pool, data);
+               } else {
+@@ -3219,6 +3227,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp,
+       pp.napi = &rxr->bnapi->napi;
+       pp.dev = &bp->pdev->dev;
+       pp.dma_dir = DMA_BIDIRECTIONAL;
++      if (PAGE_SIZE > BNXT_RX_PAGE_SIZE)
++              pp.flags |= PP_FLAG_PAGE_FRAG;
+ 
+       rxr->page_pool = page_pool_create(&pp);
+       if (IS_ERR(rxr->page_pool)) {
+diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+index 7f2f9a317d473..fb43232310b2d 100644
+--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
++++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+@@ -186,8 +186,8 @@ void bnxt_xdp_buff_init(struct bnxt *bp, struct bnxt_rx_ring_info *rxr,
+                       u16 cons, u8 *data_ptr, unsigned int len,
+                       struct xdp_buff *xdp)
+ {
++      u32 buflen = BNXT_RX_PAGE_SIZE;
+       struct bnxt_sw_rx_bd *rx_buf;
+-      u32 buflen = PAGE_SIZE;
+       struct pci_dev *pdev;
+       dma_addr_t mapping;
+       u32 offset;
+@@ -303,7 +303,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
+               rx_buf = &rxr->rx_buf_ring[cons];
+               mapping = rx_buf->mapping - bp->rx_dma_offset;
+               dma_unmap_page_attrs(&pdev->dev, mapping,
+-                                   PAGE_SIZE, bp->rx_dir,
++                                   BNXT_RX_PAGE_SIZE, bp->rx_dir,
+                                    DMA_ATTR_WEAK_ORDERING);
+ 
+               /* if we are unable to allocate a new buffer, abort and reuse */
+@@ -486,7 +486,7 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags,
+       }
+       xdp_update_skb_shared_info(skb, num_frags,
+                                  sinfo->xdp_frags_size,
+-                                 PAGE_SIZE * sinfo->nr_frags,
++                                 BNXT_RX_PAGE_SIZE * sinfo->nr_frags,
+                                  xdp_buff_is_frag_pfmemalloc(xdp));
+       return skb;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-add-length-check-for-sk_diag_bpf_storage_req_map.patch b/queue-6.4/bpf-add-length-check-for-sk_diag_bpf_storage_req_map.patch

new file mode 100644 (file)

index 0000000..d02c24c
--- /dev/null
+++ b/queue-6.4/bpf-add-length-check-for-sk_diag_bpf_storage_req_map.patch
@@ -0,0 +1,49 @@
+From 45dde16cbc61ae409d5be3ad220493ea6ccb5cfd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 10:33:30 +0800
+Subject: bpf: Add length check for SK_DIAG_BPF_STORAGE_REQ_MAP_FD parsing
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit bcc29b7f5af6797702c2306a7aacb831fc5ce9cb ]
+
+The nla_for_each_nested parsing in function bpf_sk_storage_diag_alloc
+does not check the length of the nested attribute. This can lead to an
+out-of-attribute read and allow a malformed nlattr (e.g., length 0) to
+be viewed as a 4 byte integer.
+
+This patch adds an additional check when the nlattr is getting counted.
+This makes sure the latter nla_get_u32 can access the attributes with
+the correct length.
+
+Fixes: 1ed4d92458a9 ("bpf: INET_DIAG support in bpf_sk_storage")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Link: https://lore.kernel.org/r/20230725023330.422856-1-linma@zju.edu.cn
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/bpf_sk_storage.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
+index d4172534dfa8d..cca7594be92ec 100644
+--- a/net/core/bpf_sk_storage.c
++++ b/net/core/bpf_sk_storage.c
+@@ -496,8 +496,11 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
+               return ERR_PTR(-EPERM);
+ 
+       nla_for_each_nested(nla, nla_stgs, rem) {
+-              if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD)
++              if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) {
++                      if (nla_len(nla) != sizeof(u32))
++                              return ERR_PTR(-EINVAL);
+                       nr_maps++;
++              }
+       }
+ 
+       diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL);
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-centralize-permissions-checks-for-all-bpf-map-ty.patch b/queue-6.4/bpf-centralize-permissions-checks-for-all-bpf-map-ty.patch

new file mode 100644 (file)

index 0000000..8711a0c
--- /dev/null
+++ b/queue-6.4/bpf-centralize-permissions-checks-for-all-bpf-map-ty.patch
@@ -0,0 +1,319 @@
+From 0cc0ef483ca6b89693dd77e7b1b96b8a4f9f44f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Jun 2023 15:35:32 -0700
+Subject: bpf: Centralize permissions checks for all BPF map types
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 6c3eba1c5e283fd2bb1c076dbfcb47f569c3bfde ]
+
+This allows to do more centralized decisions later on, and generally
+makes it very explicit which maps are privileged and which are not
+(e.g., LRU_HASH and LRU_PERCPU_HASH, which are privileged HASH variants,
+as opposed to unprivileged HASH and HASH_PERCPU; now this is explicit
+and easy to verify).
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/bpf/20230613223533.3689589-4-andrii@kernel.org
+Stable-dep-of: 640a604585aa ("bpf, cpumap: Make sure kthread is running before map update returns")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bloom_filter.c                     |  3 --
+ kernel/bpf/bpf_local_storage.c                |  3 --
+ kernel/bpf/bpf_struct_ops.c                   |  3 --
+ kernel/bpf/cpumap.c                           |  4 --
+ kernel/bpf/devmap.c                           |  3 --
+ kernel/bpf/hashtab.c                          |  6 ---
+ kernel/bpf/lpm_trie.c                         |  3 --
+ kernel/bpf/queue_stack_maps.c                 |  4 --
+ kernel/bpf/reuseport_array.c                  |  3 --
+ kernel/bpf/stackmap.c                         |  3 --
+ kernel/bpf/syscall.c                          | 47 +++++++++++++++++++
+ net/core/sock_map.c                           |  4 --
+ net/xdp/xskmap.c                              |  4 --
+ .../bpf/prog_tests/unpriv_bpf_disabled.c      |  6 ++-
+ 14 files changed, 52 insertions(+), 44 deletions(-)
+
+diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
+index 540331b610a97..addf3dd57b59b 100644
+--- a/kernel/bpf/bloom_filter.c
++++ b/kernel/bpf/bloom_filter.c
+@@ -86,9 +86,6 @@ static struct bpf_map *bloom_map_alloc(union bpf_attr *attr)
+       int numa_node = bpf_map_attr_numa_node(attr);
+       struct bpf_bloom_filter *bloom;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       if (attr->key_size != 0 || attr->value_size == 0 ||
+           attr->max_entries == 0 ||
+           attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||
+diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
+index 47d9948d768f0..b5149cfce7d4d 100644
+--- a/kernel/bpf/bpf_local_storage.c
++++ b/kernel/bpf/bpf_local_storage.c
+@@ -723,9 +723,6 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
+           !attr->btf_key_type_id || !attr->btf_value_type_id)
+               return -EINVAL;
+ 
+-      if (!bpf_capable())
+-              return -EPERM;
+-
+       if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
+               return -E2BIG;
+ 
+diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
+index d3f0a4825fa61..116a0ce378ecd 100644
+--- a/kernel/bpf/bpf_struct_ops.c
++++ b/kernel/bpf/bpf_struct_ops.c
+@@ -655,9 +655,6 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
+       const struct btf_type *t, *vt;
+       struct bpf_map *map;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
+       if (!st_ops)
+               return ERR_PTR(-ENOTSUPP);
+diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
+index 3da63be602d1c..6ae02be7a48e3 100644
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -28,7 +28,6 @@
+ #include <linux/sched.h>
+ #include <linux/workqueue.h>
+ #include <linux/kthread.h>
+-#include <linux/capability.h>
+ #include <trace/events/xdp.h>
+ #include <linux/btf_ids.h>
+ 
+@@ -89,9 +88,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
+       u32 value_size = attr->value_size;
+       struct bpf_cpu_map *cmap;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->key_size != 4 ||
+           (value_size != offsetofend(struct bpf_cpumap_val, qsize) &&
+diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
+index 802692fa3905c..49cc0b5671c61 100644
+--- a/kernel/bpf/devmap.c
++++ b/kernel/bpf/devmap.c
+@@ -160,9 +160,6 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr)
+       struct bpf_dtab *dtab;
+       int err;
+ 
+-      if (!capable(CAP_NET_ADMIN))
+-              return ERR_PTR(-EPERM);
+-
+       dtab = bpf_map_area_alloc(sizeof(*dtab), NUMA_NO_NODE);
+       if (!dtab)
+               return ERR_PTR(-ENOMEM);
+diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
+index 9901efee4339d..56d3da7d0bc66 100644
+--- a/kernel/bpf/hashtab.c
++++ b/kernel/bpf/hashtab.c
+@@ -422,12 +422,6 @@ static int htab_map_alloc_check(union bpf_attr *attr)
+       BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
+                    offsetof(struct htab_elem, hash_node.pprev));
+ 
+-      if (lru && !bpf_capable())
+-              /* LRU implementation is much complicated than other
+-               * maps.  Hence, limit to CAP_BPF.
+-               */
+-              return -EPERM;
+-
+       if (zero_seed && !capable(CAP_SYS_ADMIN))
+               /* Guard against local DoS, and discourage production use. */
+               return -EPERM;
+diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
+index e0d3ddf2037ab..17c7e7782a1f7 100644
+--- a/kernel/bpf/lpm_trie.c
++++ b/kernel/bpf/lpm_trie.c
+@@ -544,9 +544,6 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
+ {
+       struct lpm_trie *trie;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 ||
+           !(attr->map_flags & BPF_F_NO_PREALLOC) ||
+diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
+index 601609164ef34..8d2ddcb7566b7 100644
+--- a/kernel/bpf/queue_stack_maps.c
++++ b/kernel/bpf/queue_stack_maps.c
+@@ -7,7 +7,6 @@
+ #include <linux/bpf.h>
+ #include <linux/list.h>
+ #include <linux/slab.h>
+-#include <linux/capability.h>
+ #include <linux/btf_ids.h>
+ #include "percpu_freelist.h"
+ 
+@@ -46,9 +45,6 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
+ /* Called from syscall */
+ static int queue_stack_map_alloc_check(union bpf_attr *attr)
+ {
+-      if (!bpf_capable())
+-              return -EPERM;
+-
+       /* check sanity of attributes */
+       if (attr->max_entries == 0 || attr->key_size != 0 ||
+           attr->value_size == 0 ||
+diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
+index cbf2d8d784b89..4b4f9670f1a9a 100644
+--- a/kernel/bpf/reuseport_array.c
++++ b/kernel/bpf/reuseport_array.c
+@@ -151,9 +151,6 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
+       int numa_node = bpf_map_attr_numa_node(attr);
+       struct reuseport_array *array;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       /* allocate all map elements and zero-initialize them */
+       array = bpf_map_area_alloc(struct_size(array, ptrs, attr->max_entries), numa_node);
+       if (!array)
+diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
+index b25fce425b2c6..458bb80b14d57 100644
+--- a/kernel/bpf/stackmap.c
++++ b/kernel/bpf/stackmap.c
+@@ -74,9 +74,6 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
+       u64 cost, n_buckets;
+       int err;
+ 
+-      if (!bpf_capable())
+-              return ERR_PTR(-EPERM);
+-
+       if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
+               return ERR_PTR(-EINVAL);
+ 
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 8fddf0eea9bf2..f715ec5d541ad 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -1156,6 +1156,53 @@ static int map_create(union bpf_attr *attr)
+       if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+               return -EPERM;
+ 
++      /* check privileged map type permissions */
++      switch (map_type) {
++      case BPF_MAP_TYPE_ARRAY:
++      case BPF_MAP_TYPE_PERCPU_ARRAY:
++      case BPF_MAP_TYPE_PROG_ARRAY:
++      case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
++      case BPF_MAP_TYPE_CGROUP_ARRAY:
++      case BPF_MAP_TYPE_ARRAY_OF_MAPS:
++      case BPF_MAP_TYPE_HASH:
++      case BPF_MAP_TYPE_PERCPU_HASH:
++      case BPF_MAP_TYPE_HASH_OF_MAPS:
++      case BPF_MAP_TYPE_RINGBUF:
++      case BPF_MAP_TYPE_USER_RINGBUF:
++      case BPF_MAP_TYPE_CGROUP_STORAGE:
++      case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
++              /* unprivileged */
++              break;
++      case BPF_MAP_TYPE_SK_STORAGE:
++      case BPF_MAP_TYPE_INODE_STORAGE:
++      case BPF_MAP_TYPE_TASK_STORAGE:
++      case BPF_MAP_TYPE_CGRP_STORAGE:
++      case BPF_MAP_TYPE_BLOOM_FILTER:
++      case BPF_MAP_TYPE_LPM_TRIE:
++      case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
++      case BPF_MAP_TYPE_STACK_TRACE:
++      case BPF_MAP_TYPE_QUEUE:
++      case BPF_MAP_TYPE_STACK:
++      case BPF_MAP_TYPE_LRU_HASH:
++      case BPF_MAP_TYPE_LRU_PERCPU_HASH:
++      case BPF_MAP_TYPE_STRUCT_OPS:
++      case BPF_MAP_TYPE_CPUMAP:
++              if (!bpf_capable())
++                      return -EPERM;
++              break;
++      case BPF_MAP_TYPE_SOCKMAP:
++      case BPF_MAP_TYPE_SOCKHASH:
++      case BPF_MAP_TYPE_DEVMAP:
++      case BPF_MAP_TYPE_DEVMAP_HASH:
++      case BPF_MAP_TYPE_XSKMAP:
++              if (!capable(CAP_NET_ADMIN))
++                      return -EPERM;
++              break;
++      default:
++              WARN(1, "unsupported map type %d", map_type);
++              return -EPERM;
++      }
++
+       map = ops->map_alloc(attr);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 00afb66cd0950..19538d6287144 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -32,8 +32,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)
+ {
+       struct bpf_stab *stab;
+ 
+-      if (!capable(CAP_NET_ADMIN))
+-              return ERR_PTR(-EPERM);
+       if (attr->max_entries == 0 ||
+           attr->key_size    != 4 ||
+           (attr->value_size != sizeof(u32) &&
+@@ -1085,8 +1083,6 @@ static struct bpf_map *sock_hash_alloc(union bpf_attr *attr)
+       struct bpf_shtab *htab;
+       int i, err;
+ 
+-      if (!capable(CAP_NET_ADMIN))
+-              return ERR_PTR(-EPERM);
+       if (attr->max_entries == 0 ||
+           attr->key_size    == 0 ||
+           (attr->value_size != sizeof(u32) &&
+diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
+index 2c1427074a3bb..e1c526f97ce31 100644
+--- a/net/xdp/xskmap.c
++++ b/net/xdp/xskmap.c
+@@ -5,7 +5,6 @@
+ 
+ #include <linux/bpf.h>
+ #include <linux/filter.h>
+-#include <linux/capability.h>
+ #include <net/xdp_sock.h>
+ #include <linux/slab.h>
+ #include <linux/sched.h>
+@@ -68,9 +67,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+       int numa_node;
+       u64 size;
+ 
+-      if (!capable(CAP_NET_ADMIN))
+-              return ERR_PTR(-EPERM);
+-
+       if (attr->max_entries == 0 || attr->key_size != 4 ||
+           attr->value_size != 4 ||
+           attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
+index 8383a99f610fd..0adf8d9475cb2 100644
+--- a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
++++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
+@@ -171,7 +171,11 @@ static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *s
+                               prog_insns, prog_insn_cnt, &load_opts),
+                 -EPERM, "prog_load_fails");
+ 
+-      for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++)
++      /* some map types require particular correct parameters which could be
++       * sanity-checked before enforcing -EPERM, so only validate that
++       * the simple ARRAY and HASH maps are failing with -EPERM
++       */
++      for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++)
+               ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
+                         -EPERM, "map_create_fails");
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-cpumap-handle-skb-as-well-when-clean-up-ptr_ring.patch b/queue-6.4/bpf-cpumap-handle-skb-as-well-when-clean-up-ptr_ring.patch

new file mode 100644 (file)

index 0000000..f7c7379
--- /dev/null
+++ b/queue-6.4/bpf-cpumap-handle-skb-as-well-when-clean-up-ptr_ring.patch
@@ -0,0 +1,85 @@
+From f3613f120a1f0fb4b2d6ce67c2f6137c4ece42f6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 17:51:07 +0800
+Subject: bpf, cpumap: Handle skb as well when clean up ptr_ring
+
+From: Hou Tao <houtao1@huawei.com>
+
+[ Upstream commit 7c62b75cd1a792e14b037fa4f61f9b18914e7de1 ]
+
+The following warning was reported when running xdp_redirect_cpu with
+both skb-mode and stress-mode enabled:
+
+  ------------[ cut here ]------------
+  Incorrect XDP memory type (-2128176192) usage
+  WARNING: CPU: 7 PID: 1442 at net/core/xdp.c:405
+  Modules linked in:
+  CPU: 7 PID: 1442 Comm: kworker/7:0 Tainted: G  6.5.0-rc2+ #1
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+  Workqueue: events __cpu_map_entry_free
+  RIP: 0010:__xdp_return+0x1e4/0x4a0
+  ......
+  Call Trace:
+   <TASK>
+   ? show_regs+0x65/0x70
+   ? __warn+0xa5/0x240
+   ? __xdp_return+0x1e4/0x4a0
+   ......
+   xdp_return_frame+0x4d/0x150
+   __cpu_map_entry_free+0xf9/0x230
+   process_one_work+0x6b0/0xb80
+   worker_thread+0x96/0x720
+   kthread+0x1a5/0x1f0
+   ret_from_fork+0x3a/0x70
+   ret_from_fork_asm+0x1b/0x30
+   </TASK>
+
+The reason for the warning is twofold. One is due to the kthread
+cpu_map_kthread_run() is stopped prematurely. Another one is
+__cpu_map_ring_cleanup() doesn't handle skb mode and treats skbs in
+ptr_ring as XDP frames.
+
+Prematurely-stopped kthread will be fixed by the preceding patch and
+ptr_ring will be empty when __cpu_map_ring_cleanup() is called. But
+as the comments in __cpu_map_ring_cleanup() said, handling and freeing
+skbs in ptr_ring as well to "catch any broken behaviour gracefully".
+
+Fixes: 11941f8a8536 ("bpf: cpumap: Implement generic cpumap")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://lore.kernel.org/r/20230729095107.1722450-3-houtao@huaweicloud.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cpumap.c | 14 ++++++++++----
+ 1 file changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
+index 7eeb200251640..286ab3db0fde8 100644
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -131,11 +131,17 @@ static void __cpu_map_ring_cleanup(struct ptr_ring *ring)
+        * invoked cpu_map_kthread_stop(). Catch any broken behaviour
+        * gracefully and warn once.
+        */
+-      struct xdp_frame *xdpf;
++      void *ptr;
+ 
+-      while ((xdpf = ptr_ring_consume(ring)))
+-              if (WARN_ON_ONCE(xdpf))
+-                      xdp_return_frame(xdpf);
++      while ((ptr = ptr_ring_consume(ring))) {
++              WARN_ON_ONCE(1);
++              if (unlikely(__ptr_test_bit(0, &ptr))) {
++                      __ptr_clear_bit(0, &ptr);
++                      kfree_skb(ptr);
++                      continue;
++              }
++              xdp_return_frame(ptr);
++      }
+ }
+ 
+ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-cpumap-make-sure-kthread-is-running-before-map-u.patch b/queue-6.4/bpf-cpumap-make-sure-kthread-is-running-before-map-u.patch

new file mode 100644 (file)

index 0000000..0d2a057
--- /dev/null
+++ b/queue-6.4/bpf-cpumap-make-sure-kthread-is-running-before-map-u.patch
@@ -0,0 +1,142 @@
+From 19ec523419b1f9d6a169dadd970e20071b8947f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 17:51:06 +0800
+Subject: bpf, cpumap: Make sure kthread is running before map update returns
+
+From: Hou Tao <houtao1@huawei.com>
+
+[ Upstream commit 640a604585aa30f93e39b17d4d6ba69fcb1e66c9 ]
+
+The following warning was reported when running stress-mode enabled
+xdp_redirect_cpu with some RT threads:
+
+  ------------[ cut here ]------------
+  WARNING: CPU: 4 PID: 65 at kernel/bpf/cpumap.c:135
+  CPU: 4 PID: 65 Comm: kworker/4:1 Not tainted 6.5.0-rc2+ #1
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
+  Workqueue: events cpu_map_kthread_stop
+  RIP: 0010:put_cpu_map_entry+0xda/0x220
+  ......
+  Call Trace:
+   <TASK>
+   ? show_regs+0x65/0x70
+   ? __warn+0xa5/0x240
+   ......
+   ? put_cpu_map_entry+0xda/0x220
+   cpu_map_kthread_stop+0x41/0x60
+   process_one_work+0x6b0/0xb80
+   worker_thread+0x96/0x720
+   kthread+0x1a5/0x1f0
+   ret_from_fork+0x3a/0x70
+   ret_from_fork_asm+0x1b/0x30
+   </TASK>
+
+The root cause is the same as commit 436901649731 ("bpf: cpumap: Fix memory
+leak in cpu_map_update_elem"). The kthread is stopped prematurely by
+kthread_stop() in cpu_map_kthread_stop(), and kthread() doesn't call
+cpu_map_kthread_run() at all but XDP program has already queued some
+frames or skbs into ptr_ring. So when __cpu_map_ring_cleanup() checks
+the ptr_ring, it will find it was not emptied and report a warning.
+
+An alternative fix is to use __cpu_map_ring_cleanup() to drop these
+pending frames or skbs when kthread_stop() returns -EINTR, but it may
+confuse the user, because these frames or skbs have been handled
+correctly by XDP program. So instead of dropping these frames or skbs,
+just make sure the per-cpu kthread is running before
+__cpu_map_entry_alloc() returns.
+
+After apply the fix, the error handle for kthread_stop() will be
+unnecessary because it will always return 0, so just remove it.
+
+Fixes: 6710e1126934 ("bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP")
+Signed-off-by: Hou Tao <houtao1@huawei.com>
+Reviewed-by: Pu Lehui <pulehui@huawei.com>
+Acked-by: Jesper Dangaard Brouer <hawk@kernel.org>
+Link: https://lore.kernel.org/r/20230729095107.1722450-2-houtao@huaweicloud.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cpumap.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
+index 6ae02be7a48e3..7eeb200251640 100644
+--- a/kernel/bpf/cpumap.c
++++ b/kernel/bpf/cpumap.c
+@@ -28,6 +28,7 @@
+ #include <linux/sched.h>
+ #include <linux/workqueue.h>
+ #include <linux/kthread.h>
++#include <linux/completion.h>
+ #include <trace/events/xdp.h>
+ #include <linux/btf_ids.h>
+ 
+@@ -73,6 +74,7 @@ struct bpf_cpu_map_entry {
+       struct rcu_head rcu;
+ 
+       struct work_struct kthread_stop_wq;
++      struct completion kthread_running;
+ };
+ 
+ struct bpf_cpu_map {
+@@ -153,7 +155,6 @@ static void put_cpu_map_entry(struct bpf_cpu_map_entry *rcpu)
+ static void cpu_map_kthread_stop(struct work_struct *work)
+ {
+       struct bpf_cpu_map_entry *rcpu;
+-      int err;
+ 
+       rcpu = container_of(work, struct bpf_cpu_map_entry, kthread_stop_wq);
+ 
+@@ -163,14 +164,7 @@ static void cpu_map_kthread_stop(struct work_struct *work)
+       rcu_barrier();
+ 
+       /* kthread_stop will wake_up_process and wait for it to complete */
+-      err = kthread_stop(rcpu->kthread);
+-      if (err) {
+-              /* kthread_stop may be called before cpu_map_kthread_run
+-               * is executed, so we need to release the memory related
+-               * to rcpu.
+-               */
+-              put_cpu_map_entry(rcpu);
+-      }
++      kthread_stop(rcpu->kthread);
+ }
+ 
+ static void cpu_map_bpf_prog_run_skb(struct bpf_cpu_map_entry *rcpu,
+@@ -298,11 +292,11 @@ static int cpu_map_bpf_prog_run(struct bpf_cpu_map_entry *rcpu, void **frames,
+       return nframes;
+ }
+ 
+-
+ static int cpu_map_kthread_run(void *data)
+ {
+       struct bpf_cpu_map_entry *rcpu = data;
+ 
++      complete(&rcpu->kthread_running);
+       set_current_state(TASK_INTERRUPTIBLE);
+ 
+       /* When kthread gives stop order, then rcpu have been disconnected
+@@ -467,6 +461,7 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+               goto free_ptr_ring;
+ 
+       /* Setup kthread */
++      init_completion(&rcpu->kthread_running);
+       rcpu->kthread = kthread_create_on_node(cpu_map_kthread_run, rcpu, numa,
+                                              "cpumap/%d/map:%d", cpu,
+                                              map->id);
+@@ -480,6 +475,12 @@ __cpu_map_entry_alloc(struct bpf_map *map, struct bpf_cpumap_val *value,
+       kthread_bind(rcpu->kthread, cpu);
+       wake_up_process(rcpu->kthread);
+ 
++      /* Make sure kthread has been running, so kthread_stop() will not
++       * stop the kthread prematurely and all pending frames or skbs
++       * will be handled by the kthread before kthread_stop() returns.
++       */
++      wait_for_completion(&rcpu->kthread_running);
++
+       return rcpu;
+ 
+ free_prog:
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-inline-map-creation-logic-in-map_create-function.patch b/queue-6.4/bpf-inline-map-creation-logic-in-map_create-function.patch

new file mode 100644 (file)

index 0000000..122f42c
--- /dev/null
+++ b/queue-6.4/bpf-inline-map-creation-logic-in-map_create-function.patch
@@ -0,0 +1,123 @@
+From 93cd0e27b64970fd0cd70ca09f8a90ddc71d77be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Jun 2023 15:35:31 -0700
+Subject: bpf: Inline map creation logic in map_create() function
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 22db41226b679768df8f0a4ff5de8e58f625f45b ]
+
+Currently find_and_alloc_map() performs two separate functions: some
+argument sanity checking and partial map creation workflow hanling.
+Neither of those functions are self-sufficient and are augmented by
+further checks and initialization logic in the caller (map_create()
+function). So unify all the sanity checks, permission checks, and
+creation and initialization logic in one linear piece of code in
+map_create() instead. This also make it easier to further enhance
+permission checks and keep them located in one place.
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/bpf/20230613223533.3689589-3-andrii@kernel.org
+Stable-dep-of: 640a604585aa ("bpf, cpumap: Make sure kthread is running before map update returns")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 57 +++++++++++++++++++-------------------------
+ 1 file changed, 24 insertions(+), 33 deletions(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 0a7238125e1a4..8fddf0eea9bf2 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -109,37 +109,6 @@ const struct bpf_map_ops bpf_map_offload_ops = {
+       .map_mem_usage = bpf_map_offload_map_mem_usage,
+ };
+ 
+-static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
+-{
+-      const struct bpf_map_ops *ops;
+-      u32 type = attr->map_type;
+-      struct bpf_map *map;
+-      int err;
+-
+-      if (type >= ARRAY_SIZE(bpf_map_types))
+-              return ERR_PTR(-EINVAL);
+-      type = array_index_nospec(type, ARRAY_SIZE(bpf_map_types));
+-      ops = bpf_map_types[type];
+-      if (!ops)
+-              return ERR_PTR(-EINVAL);
+-
+-      if (ops->map_alloc_check) {
+-              err = ops->map_alloc_check(attr);
+-              if (err)
+-                      return ERR_PTR(err);
+-      }
+-      if (attr->map_ifindex)
+-              ops = &bpf_map_offload_ops;
+-      if (!ops->map_mem_usage)
+-              return ERR_PTR(-EINVAL);
+-      map = ops->map_alloc(attr);
+-      if (IS_ERR(map))
+-              return map;
+-      map->ops = ops;
+-      map->map_type = type;
+-      return map;
+-}
+-
+ static void bpf_map_write_active_inc(struct bpf_map *map)
+ {
+       atomic64_inc(&map->writecnt);
+@@ -1127,7 +1096,9 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
+ /* called via syscall */
+ static int map_create(union bpf_attr *attr)
+ {
++      const struct bpf_map_ops *ops;
+       int numa_node = bpf_map_attr_numa_node(attr);
++      u32 map_type = attr->map_type;
+       struct bpf_map *map;
+       int f_flags;
+       int err;
+@@ -1157,6 +1128,25 @@ static int map_create(union bpf_attr *attr)
+            !node_online(numa_node)))
+               return -EINVAL;
+ 
++      /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
++      map_type = attr->map_type;
++      if (map_type >= ARRAY_SIZE(bpf_map_types))
++              return -EINVAL;
++      map_type = array_index_nospec(map_type, ARRAY_SIZE(bpf_map_types));
++      ops = bpf_map_types[map_type];
++      if (!ops)
++              return -EINVAL;
++
++      if (ops->map_alloc_check) {
++              err = ops->map_alloc_check(attr);
++              if (err)
++                      return err;
++      }
++      if (attr->map_ifindex)
++              ops = &bpf_map_offload_ops;
++      if (!ops->map_mem_usage)
++              return -EINVAL;
++
+       /* Intent here is for unprivileged_bpf_disabled to block BPF map
+        * creation for unprivileged users; other actions depend
+        * on fd availability and access to bpffs, so are dependent on
+@@ -1166,10 +1156,11 @@ static int map_create(union bpf_attr *attr)
+       if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
+               return -EPERM;
+ 
+-      /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
+-      map = find_and_alloc_map(attr);
++      map = ops->map_alloc(attr);
+       if (IS_ERR(map))
+               return PTR_ERR(map);
++      map->ops = ops;
++      map->map_type = map_type;
+ 
+       err = bpf_obj_name_cpy(map->name, attr->map_name,
+                              sizeof(attr->map_name));
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-move-unprivileged-checks-into-map_create-and-bpf.patch b/queue-6.4/bpf-move-unprivileged-checks-into-map_create-and-bpf.patch

new file mode 100644 (file)

index 0000000..cd03db7
--- /dev/null
+++ b/queue-6.4/bpf-move-unprivileged-checks-into-map_create-and-bpf.patch
@@ -0,0 +1,92 @@
+From b4d50bd1c0db1d779611229457cd87880a638c9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Jun 2023 15:35:30 -0700
+Subject: bpf: Move unprivileged checks into map_create() and bpf_prog_load()
+
+From: Andrii Nakryiko <andrii@kernel.org>
+
+[ Upstream commit 1d28635abcf1914425d6516e641978011984c58a ]
+
+Make each bpf() syscall command a bit more self-contained, making it
+easier to further enhance it. We move sysctl_unprivileged_bpf_disabled
+handling down to map_create() and bpf_prog_load(), two special commands
+in this regard.
+
+Also swap the order of checks, calling bpf_capable() only if
+sysctl_unprivileged_bpf_disabled is true, avoiding unnecessary audit
+messages.
+
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Link: https://lore.kernel.org/bpf/20230613223533.3689589-2-andrii@kernel.org
+Stable-dep-of: 640a604585aa ("bpf, cpumap: Make sure kthread is running before map update returns")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 5524fcf6fb2a4..0a7238125e1a4 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -1157,6 +1157,15 @@ static int map_create(union bpf_attr *attr)
+            !node_online(numa_node)))
+               return -EINVAL;
+ 
++      /* Intent here is for unprivileged_bpf_disabled to block BPF map
++       * creation for unprivileged users; other actions depend
++       * on fd availability and access to bpffs, so are dependent on
++       * object creation success. Even with unprivileged BPF disabled,
++       * capability checks are still carried out.
++       */
++      if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
++              return -EPERM;
++
+       /* find map type and init map: hashtable vs rbtree vs bloom vs ... */
+       map = find_and_alloc_map(attr);
+       if (IS_ERR(map))
+@@ -2535,6 +2544,16 @@ static int bpf_prog_load(union bpf_attr *attr, bpfptr_t uattr, u32 uattr_size)
+       /* eBPF programs must be GPL compatible to use GPL-ed functions */
+       is_gpl = license_is_gpl_compatible(license);
+ 
++      /* Intent here is for unprivileged_bpf_disabled to block BPF program
++       * creation for unprivileged users; other actions depend
++       * on fd availability and access to bpffs, so are dependent on
++       * object creation success. Even with unprivileged BPF disabled,
++       * capability checks are still carried out for these
++       * and other operations.
++       */
++      if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
++              return -EPERM;
++
+       if (attr->insn_cnt == 0 ||
+           attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+               return -E2BIG;
+@@ -5018,23 +5037,8 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
+ static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
+ {
+       union bpf_attr attr;
+-      bool capable;
+       int err;
+ 
+-      capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
+-
+-      /* Intent here is for unprivileged_bpf_disabled to block key object
+-       * creation commands for unprivileged users; other actions depend
+-       * of fd availability and access to bpffs, so are dependent on
+-       * object creation success.  Capabilities are later verified for
+-       * operations such as load and map create, so even with unprivileged
+-       * BPF disabled, capability checks are still carried out for these
+-       * and other operations.
+-       */
+-      if (!capable &&
+-          (cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
+-              return -EPERM;
+-
+       err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
+       if (err)
+               return err;
+-- 
+2.40.1
+
diff --git a/queue-6.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch b/queue-6.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch

new file mode 100644 (file)

index 0000000..482d19e
--- /dev/null
+++ b/queue-6.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch
@@ -0,0 +1,59 @@
+From d28402ad5ce87b3d2394781e0aa66df96995bbf3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 08:44:11 +0200
+Subject: bpf: sockmap: Remove preempt_disable in sock_map_sk_acquire
+
+From: Tomas Glozar <tglozar@redhat.com>
+
+[ Upstream commit 13d2618b48f15966d1adfe1ff6a1985f5eef40ba ]
+
+Disabling preemption in sock_map_sk_acquire conflicts with GFP_ATOMIC
+allocation later in sk_psock_init_link on PREEMPT_RT kernels, since
+GFP_ATOMIC might sleep on RT (see bpf: Make BPF and PREEMPT_RT co-exist
+patchset notes for details).
+
+This causes calling bpf_map_update_elem on BPF_MAP_TYPE_SOCKMAP maps to
+BUG (sleeping function called from invalid context) on RT kernels.
+
+preempt_disable was introduced together with lock_sk and rcu_read_lock
+in commit 99ba2b5aba24e ("bpf: sockhash, disallow bpf_tcp_close and update
+in parallel"), probably to match disabled migration of BPF programs, and
+is no longer necessary.
+
+Remove preempt_disable to fix BUG in sock_map_update_common on RT.
+
+Signed-off-by: Tomas Glozar <tglozar@redhat.com>
+Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
+Link: https://lore.kernel.org/all/20200224140131.461979697@linutronix.de/
+Fixes: 99ba2b5aba24 ("bpf: sockhash, disallow bpf_tcp_close and update in parallel")
+Reviewed-by: John Fastabend <john.fastabend@gmail.com>
+Link: https://lore.kernel.org/r/20230728064411.305576-1-tglozar@redhat.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock_map.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index 19538d6287144..08ab108206bf8 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -115,7 +115,6 @@ static void sock_map_sk_acquire(struct sock *sk)
+       __acquires(&sk->sk_lock.slock)
+ {
+       lock_sock(sk);
+-      preempt_disable();
+       rcu_read_lock();
+ }
+ 
+@@ -123,7 +122,6 @@ static void sock_map_sk_release(struct sock *sk)
+       __releases(&sk->sk_lock.slock)
+ {
+       rcu_read_unlock();
+-      preempt_enable();
+       release_sock(sk);
+ }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/erofs-fix-wrong-primary-bvec-selection-on-deduplicat.patch b/queue-6.4/erofs-fix-wrong-primary-bvec-selection-on-deduplicat.patch

new file mode 100644 (file)

index 0000000..30c84e7
--- /dev/null
+++ b/queue-6.4/erofs-fix-wrong-primary-bvec-selection-on-deduplicat.patch
@@ -0,0 +1,70 @@
+From 691c1476f05986e9a7949266eb16e593a3792b06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 14:54:59 +0800
+Subject: erofs: fix wrong primary bvec selection on deduplicated extents
+
+From: Gao Xiang <hsiangkao@linux.alibaba.com>
+
+[ Upstream commit 94c43de73521d8ed7ebcfc6191d9dace1cbf7caa ]
+
+When handling deduplicated compressed data, there can be multiple
+decompressed extents pointing to the same compressed data in one shot.
+
+In such cases, the bvecs which belong to the longest extent will be
+selected as the primary bvecs for real decompressors to decode and the
+other duplicated bvecs will be directly copied from the primary bvecs.
+
+Previously, only relative offsets of the longest extent were checked to
+decompress the primary bvecs.  On rare occasions, it can be incorrect
+if there are several extents with the same start relative offset.
+As a result, some short bvecs could be selected for decompression and
+then cause data corruption.
+
+For example, as Shijie Sun reported off-list, considering the following
+extents of a file:
+ 117:   903345..  915250 |   11905 :     385024..    389120 |    4096
+...
+ 119:   919729..  930323 |   10594 :     385024..    389120 |    4096
+...
+ 124:   968881..  980786 |   11905 :     385024..    389120 |    4096
+
+The start relative offset is the same: 2225, but extent 119 (919729..
+930323) is shorter than the others.
+
+Let's restrict the bvec length in addition to the start offset if bvecs
+are not full.
+
+Reported-by: Shijie Sun <sunshijie@xiaomi.com>
+Fixes: 5c2a64252c5d ("erofs: introduce partial-referenced pclusters")
+Tested-by Shijie Sun <sunshijie@xiaomi.com>
+Reviewed-by: Yue Hu <huyue2@coolpad.com>
+Reviewed-by: Chao Yu <chao@kernel.org>
+Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
+Link: https://lore.kernel.org/r/20230719065459.60083-1-hsiangkao@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/erofs/zdata.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
+index 4a1c238600c52..470988bb7867e 100644
+--- a/fs/erofs/zdata.c
++++ b/fs/erofs/zdata.c
+@@ -1110,10 +1110,11 @@ static void z_erofs_do_decompressed_bvec(struct z_erofs_decompress_backend *be,
+                                        struct z_erofs_bvec *bvec)
+ {
+       struct z_erofs_bvec_item *item;
++      unsigned int pgnr;
+ 
+-      if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK)) {
+-              unsigned int pgnr;
+-
++      if (!((bvec->offset + be->pcl->pageofs_out) & ~PAGE_MASK) &&
++          (bvec->end == PAGE_SIZE ||
++           bvec->offset + bvec->end == be->pcl->length)) {
+               pgnr = (bvec->offset + be->pcl->pageofs_out) >> PAGE_SHIFT;
+               DBG_BUGON(pgnr >= be->nr_pages);
+               if (!be->decompressed_pages[pgnr]) {
+-- 
+2.40.1
+
diff --git a/queue-6.4/firmware-arm_scmi-fix-chan_free-cleanup-on-smc.patch b/queue-6.4/firmware-arm_scmi-fix-chan_free-cleanup-on-smc.patch

new file mode 100644 (file)

index 0000000..01a3a2f
--- /dev/null
+++ b/queue-6.4/firmware-arm_scmi-fix-chan_free-cleanup-on-smc.patch
@@ -0,0 +1,89 @@
+From d3088d07f837a9ea734965f83d39c3422d8ccd0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 18:35:33 +0100
+Subject: firmware: arm_scmi: Fix chan_free cleanup on SMC
+
+From: Cristian Marussi <cristian.marussi@arm.com>
+
+[ Upstream commit d1ff11d7ad8704f8d615f6446041c221b2d2ec4d ]
+
+SCMI transport based on SMC can optionally use an additional IRQ to
+signal message completion. The associated interrupt handler is currently
+allocated using devres but on shutdown the core SCMI stack will call
+.chan_free() well before any managed cleanup is invoked by devres.
+As a consequence, the arrival of a late reply to an in-flight pending
+transaction could still trigger the interrupt handler well after the
+SCMI core has cleaned up the channels, with unpleasant results.
+
+Inhibit further message processing on the IRQ path by explicitly freeing
+the IRQ inside .chan_free() callback itself.
+
+Fixes: dd820ee21d5e ("firmware: arm_scmi: Augment SMC/HVC to allow optional interrupt")
+Reported-by: Bjorn Andersson <andersson@kernel.org>
+Signed-off-by: Cristian Marussi <cristian.marussi@arm.com>
+Link: https://lore.kernel.org/r/20230719173533.2739319-1-cristian.marussi@arm.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/smc.c | 17 +++++++++++------
+ 1 file changed, 11 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/firmware/arm_scmi/smc.c b/drivers/firmware/arm_scmi/smc.c
+index 93272e4bbd12b..d0c9fce44d322 100644
+--- a/drivers/firmware/arm_scmi/smc.c
++++ b/drivers/firmware/arm_scmi/smc.c
+@@ -23,6 +23,7 @@
+ /**
+  * struct scmi_smc - Structure representing a SCMI smc transport
+  *
++ * @irq: An optional IRQ for completion
+  * @cinfo: SCMI channel info
+  * @shmem: Transmit/Receive shared memory area
+  * @shmem_lock: Lock to protect access to Tx/Rx shared memory area.
+@@ -33,6 +34,7 @@
+  */
+ 
+ struct scmi_smc {
++      int irq;
+       struct scmi_chan_info *cinfo;
+       struct scmi_shared_mem __iomem *shmem;
+       /* Protect access to shmem area */
+@@ -106,7 +108,7 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+       struct resource res;
+       struct device_node *np;
+       u32 func_id;
+-      int ret, irq;
++      int ret;
+ 
+       if (!tx)
+               return -ENODEV;
+@@ -142,11 +144,10 @@ static int smc_chan_setup(struct scmi_chan_info *cinfo, struct device *dev,
+        * completion of a message is signaled by an interrupt rather than by
+        * the return of the SMC call.
+        */
+-      irq = of_irq_get_byname(cdev->of_node, "a2p");
+-      if (irq > 0) {
+-              ret = devm_request_irq(dev, irq, smc_msg_done_isr,
+-                                     IRQF_NO_SUSPEND,
+-                                     dev_name(dev), scmi_info);
++      scmi_info->irq = of_irq_get_byname(cdev->of_node, "a2p");
++      if (scmi_info->irq > 0) {
++              ret = request_irq(scmi_info->irq, smc_msg_done_isr,
++                                IRQF_NO_SUSPEND, dev_name(dev), scmi_info);
+               if (ret) {
+                       dev_err(dev, "failed to setup SCMI smc irq\n");
+                       return ret;
+@@ -168,6 +169,10 @@ static int smc_chan_free(int id, void *p, void *data)
+       struct scmi_chan_info *cinfo = p;
+       struct scmi_smc *scmi_info = cinfo->transport_info;
+ 
++      /* Ignore any possible further reception on the IRQ path */
++      if (scmi_info->irq > 0)
++              free_irq(scmi_info->irq, scmi_info);
++
+       cinfo->transport_info = NULL;
+       scmi_info->cinfo = NULL;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/firmware-arm_scmi-fix-signed-error-return-values-han.patch b/queue-6.4/firmware-arm_scmi-fix-signed-error-return-values-han.patch

new file mode 100644 (file)

index 0000000..1cc5944
--- /dev/null
+++ b/queue-6.4/firmware-arm_scmi-fix-signed-error-return-values-han.patch
@@ -0,0 +1,47 @@
+From 13177df03b164bb5d565e4d0fe3c6cfeea331481 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 01:55:29 -0700
+Subject: firmware: arm_scmi: Fix signed error return values handling
+
+From: Sukrut Bellary <sukrut.bellary@linux.com>
+
+[ Upstream commit 81b233b8dd72f2d1df3da8bd4bd4f8c5e84937b9 ]
+
+Handle signed error return values returned by simple_write_to_buffer().
+In case of an error, return the error code.
+
+Fixes: 3c3d818a9317 ("firmware: arm_scmi: Add core raw transmission support")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Signed-off-by: Sukrut Bellary <sukrut.bellary@linux.com>
+Reviewed-by: Cristian Marussi <cristian.marussi@arm.com>
+Tested-by: Cristian Marussi <cristian.marussi@arm.com>
+Reviewed-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/20230718085529.258899-1-sukrut.bellary@linux.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scmi/raw_mode.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/firmware/arm_scmi/raw_mode.c b/drivers/firmware/arm_scmi/raw_mode.c
+index 6971dcf72fb99..0493aa3c12bf5 100644
+--- a/drivers/firmware/arm_scmi/raw_mode.c
++++ b/drivers/firmware/arm_scmi/raw_mode.c
+@@ -818,10 +818,13 @@ static ssize_t scmi_dbg_raw_mode_common_write(struct file *filp,
+        * before sending it with a single RAW xfer.
+        */
+       if (rd->tx_size < rd->tx_req_size) {
+-              size_t cnt;
++              ssize_t cnt;
+ 
+               cnt = simple_write_to_buffer(rd->tx.buf, rd->tx.len, ppos,
+                                            buf, count);
++              if (cnt < 0)
++                      return cnt;
++
+               rd->tx_size += cnt;
+               if (cnt < count)
+                       return cnt;
+-- 
+2.40.1
+
diff --git a/queue-6.4/firmware-smccc-fix-use-of-uninitialised-results-stru.patch b/queue-6.4/firmware-smccc-fix-use-of-uninitialised-results-stru.patch

new file mode 100644 (file)

index 0000000..3faae29
--- /dev/null
+++ b/queue-6.4/firmware-smccc-fix-use-of-uninitialised-results-stru.patch
@@ -0,0 +1,62 @@
+From cac2f24dd6bc4038c005841556e00c75e14d30f9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 18:17:02 +0100
+Subject: firmware: smccc: Fix use of uninitialised results structure
+
+From: Punit Agrawal <punit.agrawal@bytedance.com>
+
+[ Upstream commit d05799d7b4a39fa71c65aa277128ac7c843ffcdc ]
+
+Commit 35727af2b15d ("irqchip/gicv3: Workaround for NVIDIA erratum
+T241-FABRIC-4") moved the initialisation of the SoC version to
+arm_smccc_version_init() but forgot to update the results structure
+and it's usage.
+
+Fix the use of the uninitialised results structure and update the
+error strings.
+
+Fixes: 35727af2b15d ("irqchip/gicv3: Workaround for NVIDIA erratum T241-FABRIC-4")
+Signed-off-by: Punit Agrawal <punit.agrawal@bytedance.com>
+Cc: Sudeep Holla <sudeep.holla@arm.com>
+Cc: Marc Zyngier <maz@kernel.org>
+Cc: Vikram Sethi <vsethi@nvidia.com>
+Cc: Shanker Donthineni <sdonthineni@nvidia.com>
+Acked-by: Marc Zyngier <maz@kernel.org>
+Link: https://lore.kernel.org/r/20230717171702.424253-1-punit.agrawal@bytedance.com
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/smccc/soc_id.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/firmware/smccc/soc_id.c b/drivers/firmware/smccc/soc_id.c
+index 890eb454599a3..1990263fbba0e 100644
+--- a/drivers/firmware/smccc/soc_id.c
++++ b/drivers/firmware/smccc/soc_id.c
+@@ -34,7 +34,6 @@ static struct soc_device_attribute *soc_dev_attr;
+ 
+ static int __init smccc_soc_init(void)
+ {
+-      struct arm_smccc_res res;
+       int soc_id_rev, soc_id_version;
+       static char soc_id_str[20], soc_id_rev_str[12];
+       static char soc_id_jep106_id_str[12];
+@@ -49,13 +48,13 @@ static int __init smccc_soc_init(void)
+       }
+ 
+       if (soc_id_version < 0) {
+-              pr_err("ARCH_SOC_ID(0) returned error: %lx\n", res.a0);
++              pr_err("Invalid SoC Version: %x\n", soc_id_version);
+               return -EINVAL;
+       }
+ 
+       soc_id_rev = arm_smccc_get_soc_id_revision();
+       if (soc_id_rev < 0) {
+-              pr_err("ARCH_SOC_ID(1) returned error: %lx\n", res.a0);
++              pr_err("Invalid SoC Revision: %x\n", soc_id_rev);
+               return -EINVAL;
+       }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/ice-fix-rdma-vsi-removal-during-queue-rebuild.patch b/queue-6.4/ice-fix-rdma-vsi-removal-during-queue-rebuild.patch

new file mode 100644 (file)

index 0000000..e9adfc8
--- /dev/null
+++ b/queue-6.4/ice-fix-rdma-vsi-removal-during-queue-rebuild.patch
@@ -0,0 +1,73 @@
+From 6fb0117058c5cdf781258de11c0fc00c341754fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 10:12:43 -0700
+Subject: ice: Fix RDMA VSI removal during queue rebuild
+
+From: Rafal Rogalski <rafalx.rogalski@intel.com>
+
+[ Upstream commit 4b31fd4d77ffa430d0b74ba1885ea0a41594f202 ]
+
+During qdisc create/delete, it is necessary to rebuild the queue
+of VSIs. An error occurred because the VSIs created by RDMA were
+still active.
+
+Added check if RDMA is active. If yes, it disallows qdisc changes
+and writes a message in the system logs.
+
+Fixes: 348048e724a0 ("ice: Implement iidc operations")
+Signed-off-by: Rafal Rogalski <rafalx.rogalski@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
+Tested-by: Bharathi Sreenivas <bharathi.sreenivas@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230728171243.2446101-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/ice/ice_main.c | 18 ++++++++++++++++++
+ 1 file changed, 18 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
+index fbe70458fda27..34e8e7cb1bc54 100644
+--- a/drivers/net/ethernet/intel/ice/ice_main.c
++++ b/drivers/net/ethernet/intel/ice/ice_main.c
+@@ -9055,6 +9055,7 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+ {
+       struct ice_netdev_priv *np = netdev_priv(netdev);
+       struct ice_pf *pf = np->vsi->back;
++      bool locked = false;
+       int err;
+ 
+       switch (type) {
+@@ -9064,10 +9065,27 @@ ice_setup_tc(struct net_device *netdev, enum tc_setup_type type,
+                                                 ice_setup_tc_block_cb,
+                                                 np, np, true);
+       case TC_SETUP_QDISC_MQPRIO:
++              if (pf->adev) {
++                      mutex_lock(&pf->adev_mutex);
++                      device_lock(&pf->adev->dev);
++                      locked = true;
++                      if (pf->adev->dev.driver) {
++                              netdev_err(netdev, "Cannot change qdisc when RDMA is active\n");
++                              err = -EBUSY;
++                              goto adev_unlock;
++                      }
++              }
++
+               /* setup traffic classifier for receive side */
+               mutex_lock(&pf->tc_mutex);
+               err = ice_setup_tc_mqprio_qdisc(netdev, type_data);
+               mutex_unlock(&pf->tc_mutex);
++
++adev_unlock:
++              if (locked) {
++                      device_unlock(&pf->adev->dev);
++                      mutex_unlock(&pf->adev_mutex);
++              }
+               return err;
+       default:
+               return -EOPNOTSUPP;
+-- 
+2.40.1
+
diff --git a/queue-6.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch b/queue-6.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch

new file mode 100644 (file)

index 0000000..4ab9b81
--- /dev/null
+++ b/queue-6.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch
@@ -0,0 +1,77 @@
+From d7d627193cb9e148de8f8f522a1a21053fdb32c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 14:43:18 +0800
+Subject: ip6mr: Fix skb_under_panic in ip6mr_cache_report()
+
+From: Yue Haibing <yuehaibing@huawei.com>
+
+[ Upstream commit 30e0191b16e8a58e4620fa3e2839ddc7b9d4281c ]
+
+skbuff: skb_under_panic: text:ffffffff88771f69 len:56 put:-4
+ head:ffff88805f86a800 data:ffff887f5f86a850 tail:0x88 end:0x2c0 dev:pim6reg
+ ------------[ cut here ]------------
+ kernel BUG at net/core/skbuff.c:192!
+ invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+ CPU: 2 PID: 22968 Comm: kworker/2:11 Not tainted 6.5.0-rc3-00044-g0a8db05b571a #236
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+ Workqueue: ipv6_addrconf addrconf_dad_work
+ RIP: 0010:skb_panic+0x152/0x1d0
+ Call Trace:
+  <TASK>
+  skb_push+0xc4/0xe0
+  ip6mr_cache_report+0xd69/0x19b0
+  reg_vif_xmit+0x406/0x690
+  dev_hard_start_xmit+0x17e/0x6e0
+  __dev_queue_xmit+0x2d6a/0x3d20
+  vlan_dev_hard_start_xmit+0x3ab/0x5c0
+  dev_hard_start_xmit+0x17e/0x6e0
+  __dev_queue_xmit+0x2d6a/0x3d20
+  neigh_connected_output+0x3ed/0x570
+  ip6_finish_output2+0x5b5/0x1950
+  ip6_finish_output+0x693/0x11c0
+  ip6_output+0x24b/0x880
+  NF_HOOK.constprop.0+0xfd/0x530
+  ndisc_send_skb+0x9db/0x1400
+  ndisc_send_rs+0x12a/0x6c0
+  addrconf_dad_completed+0x3c9/0xea0
+  addrconf_dad_work+0x849/0x1420
+  process_one_work+0xa22/0x16e0
+  worker_thread+0x679/0x10c0
+  ret_from_fork+0x28/0x60
+  ret_from_fork_asm+0x11/0x20
+
+When setup a vlan device on dev pim6reg, DAD ns packet may sent on reg_vif_xmit().
+reg_vif_xmit()
+    ip6mr_cache_report()
+        skb_push(skb, -skb_network_offset(pkt));//skb_network_offset(pkt) is 4
+And skb_push declared as:
+       void *skb_push(struct sk_buff *skb, unsigned int len);
+               skb->data -= len;
+               //0xffff88805f86a84c - 0xfffffffc = 0xffff887f5f86a850
+skb->data is set to 0xffff887f5f86a850, which is invalid mem addr, lead to skb_push() fails.
+
+Fixes: 14fb64e1f449 ("[IPV6] MROUTE: Support PIM-SM (SSM).")
+Signed-off-by: Yue Haibing <yuehaibing@huawei.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6mr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
+index 51cf37abd142d..b4152b5d68ffb 100644
+--- a/net/ipv6/ip6mr.c
++++ b/net/ipv6/ip6mr.c
+@@ -1073,7 +1073,7 @@ static int ip6mr_cache_report(const struct mr_table *mrt, struct sk_buff *pkt,
+                  And all this only to mangle msg->im6_msgtype and
+                  to set msg->im6_mbz to "mbz" :-)
+                */
+-              skb_push(skb, -skb_network_offset(pkt));
++              __skb_pull(skb, skb_network_offset(pkt));
+ 
+               skb_push(skb, sizeof(*msg));
+               skb_reset_transport_header(skb);
+-- 
+2.40.1
+
diff --git a/queue-6.4/kvm-s390-fix-sthyi-error-handling.patch b/queue-6.4/kvm-s390-fix-sthyi-error-handling.patch

new file mode 100644 (file)

index 0000000..65220df
--- /dev/null
+++ b/queue-6.4/kvm-s390-fix-sthyi-error-handling.patch
@@ -0,0 +1,78 @@
+From 77fd7bec31845ffc8032b5507a28ea7897cf74f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 20:29:39 +0200
+Subject: KVM: s390: fix sthyi error handling
+
+From: Heiko Carstens <hca@linux.ibm.com>
+
+[ Upstream commit 0c02cc576eac161601927b41634f80bfd55bfa9e ]
+
+Commit 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+added cache handling for store hypervisor info. This also changed the
+possible return code for sthyi_fill().
+
+Instead of only returning a condition code like the sthyi instruction would
+do, it can now also return a negative error value (-ENOMEM). handle_styhi()
+was not changed accordingly. In case of an error, the negative error value
+would incorrectly injected into the guest PSW.
+
+Add proper error handling to prevent this, and update the comment which
+describes the possible return values of sthyi_fill().
+
+Fixes: 9fb6c9b3fea1 ("s390/sthyi: add cache to store hypervisor info")
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20230727182939.2050744-1-hca@linux.ibm.com
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/sthyi.c  | 6 +++---
+ arch/s390/kvm/intercept.c | 9 ++++++---
+ 2 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c
+index 4d141e2c132e5..2ea7f208f0e73 100644
+--- a/arch/s390/kernel/sthyi.c
++++ b/arch/s390/kernel/sthyi.c
+@@ -459,9 +459,9 @@ static int sthyi_update_cache(u64 *rc)
+  *
+  * Fills the destination with system information returned by the STHYI
+  * instruction. The data is generated by emulation or execution of STHYI,
+- * if available. The return value is the condition code that would be
+- * returned, the rc parameter is the return code which is passed in
+- * register R2 + 1.
++ * if available. The return value is either a negative error value or
++ * the condition code that would be returned, the rc parameter is the
++ * return code which is passed in register R2 + 1.
+  */
+ int sthyi_fill(void *dst, u64 *rc)
+ {
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index 2cda8d9d7c6ef..f817006f9f936 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu)
+  */
+ int handle_sthyi(struct kvm_vcpu *vcpu)
+ {
+-      int reg1, reg2, r = 0;
+-      u64 code, addr, cc = 0, rc = 0;
++      int reg1, reg2, cc = 0, r = 0;
++      u64 code, addr, rc = 0;
+       struct sthyi_sctns *sctns = NULL;
+ 
+       if (!test_kvm_facility(vcpu->kvm, 74))
+@@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
+               return -ENOMEM;
+ 
+       cc = sthyi_fill(sctns, &rc);
+-
++      if (cc < 0) {
++              free_page((unsigned long)sctns);
++              return cc;
++      }
+ out:
+       if (!cc) {
+               if (kvm_s390_pv_cpu_is_protected(vcpu)) {
+-- 
+2.40.1
+
diff --git a/queue-6.4/lib-bitmap-workaround-const_eval-test-build-failure.patch b/queue-6.4/lib-bitmap-workaround-const_eval-test-build-failure.patch

new file mode 100644 (file)

index 0000000..27249c2
--- /dev/null
+++ b/queue-6.4/lib-bitmap-workaround-const_eval-test-build-failure.patch
@@ -0,0 +1,105 @@
+From 3df40406b4e7755a1e7c8c1fe53b058c49a02bc7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 12:17:03 -0700
+Subject: lib/bitmap: workaround const_eval test build failure
+
+From: Yury Norov <yury.norov@gmail.com>
+
+[ Upstream commit 2356d198d2b4ddec24efea98271cb3be230bc787 ]
+
+When building with Clang, and when KASAN and GCOV_PROFILE_ALL are both
+enabled, the test fails to build [1]:
+
+>> lib/test_bitmap.c:920:2: error: call to '__compiletime_assert_239' declared with 'error' attribute: BUILD_BUG_ON failed: !__builtin_constant_p(res)
+           BUILD_BUG_ON(!__builtin_constant_p(res));
+           ^
+   include/linux/build_bug.h:50:2: note: expanded from macro 'BUILD_BUG_ON'
+           BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+           ^
+   include/linux/build_bug.h:39:37: note: expanded from macro 'BUILD_BUG_ON_MSG'
+   #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+                                       ^
+   include/linux/compiler_types.h:352:2: note: expanded from macro 'compiletime_assert'
+           _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+           ^
+   include/linux/compiler_types.h:340:2: note: expanded from macro '_compiletime_assert'
+           __compiletime_assert(condition, msg, prefix, suffix)
+           ^
+   include/linux/compiler_types.h:333:4: note: expanded from macro '__compiletime_assert'
+                           prefix ## suffix();                             \
+                           ^
+   <scratch space>:185:1: note: expanded from here
+   __compiletime_assert_239
+
+Originally it was attributed to s390, which now looks seemingly wrong. The
+issue is not related to bitmap code itself, but it breaks build for a given
+configuration.
+
+Disabling the const_eval test under that config may potentially hide other
+bugs. Instead, workaround it by disabling GCOV for the test_bitmap unless
+the compiler will get fixed.
+
+[1] https://github.com/ClangBuiltLinux/linux/issues/1874
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202307171254.yFcH97ej-lkp@intel.com/
+Fixes: dc34d5036692 ("lib: test_bitmap: add compile-time optimization/evaluations assertions")
+Co-developed-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Reviewed-by: Alexander Lobakin <aleksander.lobakin@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/Makefile      | 6 ++++++
+ lib/test_bitmap.c | 8 ++++----
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/lib/Makefile b/lib/Makefile
+index 876fcdeae34ec..05d8ec332baac 100644
+--- a/lib/Makefile
++++ b/lib/Makefile
+@@ -82,7 +82,13 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
+ obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
+ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
+ obj-$(CONFIG_TEST_SCANF) += test_scanf.o
++
+ obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
++ifeq ($(CONFIG_CC_IS_CLANG)$(CONFIG_KASAN),yy)
++# FIXME: Clang breaks test_bitmap_const_eval when KASAN and GCOV are enabled
++GCOV_PROFILE_test_bitmap.o := n
++endif
++
+ obj-$(CONFIG_TEST_UUID) += test_uuid.o
+ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
+ obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o
+diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
+index a8005ad3bd589..37a9108c4f588 100644
+--- a/lib/test_bitmap.c
++++ b/lib/test_bitmap.c
+@@ -1149,6 +1149,10 @@ static void __init test_bitmap_print_buf(void)
+       }
+ }
+ 
++/*
++ * FIXME: Clang breaks compile-time evaluations when KASAN and GCOV are enabled.
++ * To workaround it, GCOV is force-disabled in Makefile for this configuration.
++ */
+ static void __init test_bitmap_const_eval(void)
+ {
+       DECLARE_BITMAP(bitmap, BITS_PER_LONG);
+@@ -1174,11 +1178,7 @@ static void __init test_bitmap_const_eval(void)
+        * the compiler is fixed.
+        */
+       bitmap_clear(bitmap, 0, BITS_PER_LONG);
+-#if defined(__s390__) && defined(__clang__)
+-      if (!const_test_bit(7, bitmap))
+-#else
+       if (!test_bit(7, bitmap))
+-#endif
+               bitmap_set(bitmap, 5, 2);
+ 
+       /* Equals to `unsigned long bitopvar = BIT(20)` */
+-- 
+2.40.1
+
diff --git a/queue-6.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch b/queue-6.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch

new file mode 100644 (file)

index 0000000..cf3bad9
--- /dev/null
+++ b/queue-6.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch
@@ -0,0 +1,88 @@
+From 817526c3b4bcb3489d097635781f450bc77b51f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 08:56:19 +0000
+Subject: mISDN: hfcpci: Fix potential deadlock on &hc->lock
+
+From: Chengfeng Ye <dg573847474@gmail.com>
+
+[ Upstream commit 56c6be35fcbed54279df0a2c9e60480a61841d6f ]
+
+As &hc->lock is acquired by both timer _hfcpci_softirq() and hardirq
+hfcpci_int(), the timer should disable irq before lock acquisition
+otherwise deadlock could happen if the timmer is preemtped by the hadr irq.
+
+Possible deadlock scenario:
+hfcpci_softirq() (timer)
+    -> _hfcpci_softirq()
+    -> spin_lock(&hc->lock);
+        <irq interruption>
+        -> hfcpci_int()
+        -> spin_lock(&hc->lock); (deadlock here)
+
+This flaw was found by an experimental static analysis tool I am developing
+for irq-related deadlock.
+
+The tentative patch fixes the potential deadlock by spin_lock_irq()
+in timer.
+
+Fixes: b36b654a7e82 ("mISDN: Create /sys/class/mISDN")
+Signed-off-by: Chengfeng Ye <dg573847474@gmail.com>
+Link: https://lore.kernel.org/r/20230727085619.7419-1-dg573847474@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/isdn/hardware/mISDN/hfcpci.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
+index c0331b2680108..fe391de1aba32 100644
+--- a/drivers/isdn/hardware/mISDN/hfcpci.c
++++ b/drivers/isdn/hardware/mISDN/hfcpci.c
+@@ -839,7 +839,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+               *z1t = cpu_to_le16(new_z1);     /* now send data */
+               if (bch->tx_idx < bch->tx_skb->len)
+                       return;
+-              dev_kfree_skb(bch->tx_skb);
++              dev_kfree_skb_any(bch->tx_skb);
+               if (get_next_bframe(bch))
+                       goto next_t_frame;
+               return;
+@@ -895,7 +895,7 @@ hfcpci_fill_fifo(struct bchannel *bch)
+       }
+       bz->za[new_f1].z1 = cpu_to_le16(new_z1);        /* for next buffer */
+       bz->f1 = new_f1;        /* next frame */
+-      dev_kfree_skb(bch->tx_skb);
++      dev_kfree_skb_any(bch->tx_skb);
+       get_next_bframe(bch);
+ }
+ 
+@@ -1119,7 +1119,7 @@ tx_birq(struct bchannel *bch)
+       if (bch->tx_skb && bch->tx_idx < bch->tx_skb->len)
+               hfcpci_fill_fifo(bch);
+       else {
+-              dev_kfree_skb(bch->tx_skb);
++              dev_kfree_skb_any(bch->tx_skb);
+               if (get_next_bframe(bch))
+                       hfcpci_fill_fifo(bch);
+       }
+@@ -2277,7 +2277,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+               return 0;
+ 
+       if (hc->hw.int_m2 & HFCPCI_IRQ_ENABLE) {
+-              spin_lock(&hc->lock);
++              spin_lock_irq(&hc->lock);
+               bch = Sel_BCS(hc, hc->hw.bswapped ? 2 : 1);
+               if (bch && bch->state == ISDN_P_B_RAW) { /* B1 rx&tx */
+                       main_rec_hfcpci(bch);
+@@ -2288,7 +2288,7 @@ _hfcpci_softirq(struct device *dev, void *unused)
+                       main_rec_hfcpci(bch);
+                       tx_birq(bch);
+               }
+-              spin_unlock(&hc->lock);
++              spin_unlock_irq(&hc->lock);
+       }
+       return 0;
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch b/queue-6.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch

new file mode 100644 (file)

index 0000000..6a3bc10
--- /dev/null
+++ b/queue-6.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch
@@ -0,0 +1,36 @@
+From 5d3301e485c0e71525b412b217a7225db7d04111 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:17 +0000
+Subject: net: add missing data-race annotation for sk_ll_usec
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e5f0d2dd3c2faa671711dac6d3ff3cef307bcfe3 ]
+
+In a prior commit I forgot that sk_getsockopt() reads
+sk->sk_ll_usec without holding a lock.
+
+Fixes: 0dbffbb5335a ("net: annotate data race around sk_ll_usec")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index f9fc2a0130a9f..c0c495e0a474e 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1848,7 +1848,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+ 
+ #ifdef CONFIG_NET_RX_BUSY_POLL
+       case SO_BUSY_POLL:
+-              v.val = sk->sk_ll_usec;
++              v.val = READ_ONCE(sk->sk_ll_usec);
+               break;
+       case SO_PREFER_BUSY_POLL:
+               v.val = READ_ONCE(sk->sk_prefer_busy_poll);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch b/queue-6.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch

new file mode 100644 (file)

index 0000000..bce254a
--- /dev/null
+++ b/queue-6.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch
@@ -0,0 +1,63 @@
+From 8182c2a5f00a197c4225310adc3d11f8676575fe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:16 +0000
+Subject: net: add missing data-race annotations around sk->sk_peek_off
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 11695c6e966b0ec7ed1d16777d294cef865a5c91 ]
+
+sk_getsockopt() runs locklessly, thus we need to annotate the read
+of sk->sk_peek_off.
+
+While we are at it, add corresponding annotations to sk_set_peek_off()
+and unix_set_peek_off().
+
+Fixes: b9bb53f3836f ("sock: convert sk_peek_offset functions to WRITE_ONCE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c    | 4 ++--
+ net/unix/af_unix.c | 2 +-
+ 2 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 9298dffbe46b8..f9fc2a0130a9f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1818,7 +1818,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               if (!sock->ops->set_peek_off)
+                       return -EOPNOTSUPP;
+ 
+-              v.val = sk->sk_peek_off;
++              v.val = READ_ONCE(sk->sk_peek_off);
+               break;
+       case SO_NOFCS:
+               v.val = sock_flag(sk, SOCK_NOFCS);
+@@ -3127,7 +3127,7 @@ EXPORT_SYMBOL(__sk_mem_reclaim);
+ 
+ int sk_set_peek_off(struct sock *sk, int val)
+ {
+-      sk->sk_peek_off = val;
++      WRITE_ONCE(sk->sk_peek_off, val);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(sk_set_peek_off);
+diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
+index e7728b57a8c70..10615878e3961 100644
+--- a/net/unix/af_unix.c
++++ b/net/unix/af_unix.c
+@@ -780,7 +780,7 @@ static int unix_set_peek_off(struct sock *sk, int val)
+       if (mutex_lock_interruptible(&u->iolock))
+               return -EINTR;
+ 
+-      sk->sk_peek_off = val;
++      WRITE_ONCE(sk->sk_peek_off, val);
+       mutex_unlock(&u->iolock);
+ 
+       return 0;
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch b/queue-6.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch

new file mode 100644 (file)

index 0000000..7ab9ea6
--- /dev/null
+++ b/queue-6.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch
@@ -0,0 +1,36 @@
+From 7d433cd7874cc7f9c48a2a48a333907af9644217 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:14 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit b4b553253091cafe9ec38994acf42795e073bef5 ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvbuf locklessly.
+
+Fixes: ebb3b78db7bf ("tcp: annotate sk->sk_rcvbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index c5dfeb6d4fec6..8c69610753ec2 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1630,7 +1630,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_RCVBUF:
+-              v.val = sk->sk_rcvbuf;
++              v.val = READ_ONCE(sk->sk_rcvbuf);
+               break;
+ 
+       case SO_REUSEADDR:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch b/queue-6.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch

new file mode 100644 (file)

index 0000000..f7a4669
--- /dev/null
+++ b/queue-6.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch
@@ -0,0 +1,36 @@
+From 94a59b0713f748d393746015711c2136ab6f7157 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:11 +0000
+Subject: net: add missing READ_ONCE(sk->sk_rcvlowat) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6d12bdb435d23ff6c1890c852d85408a2f496ee ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_rcvlowat locklessly.
+
+Fixes: eac66402d1c3 ("net: annotate sk->sk_rcvlowat lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index b87f498072251..aed5d09a41c4b 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1719,7 +1719,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_RCVLOWAT:
+-              v.val = sk->sk_rcvlowat;
++              v.val = READ_ONCE(sk->sk_rcvlowat);
+               break;
+ 
+       case SO_SNDLOWAT:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch b/queue-6.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch

new file mode 100644 (file)

index 0000000..8aad428
--- /dev/null
+++ b/queue-6.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch
@@ -0,0 +1,36 @@
+From 7438e4ce0380b8f2b1ecb156b3aa84ebb0a77429 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:13 +0000
+Subject: net: add missing READ_ONCE(sk->sk_sndbuf) annotation
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 74bc084327c643499474ba75df485607da37dd6e ]
+
+In a prior commit, I forgot to change sk_getsockopt()
+when reading sk->sk_sndbuf locklessly.
+
+Fixes: e292f05e0df7 ("tcp: annotate sk->sk_sndbuf lockless reads")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index aed5d09a41c4b..c5dfeb6d4fec6 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1626,7 +1626,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_SNDBUF:
+-              v.val = sk->sk_sndbuf;
++              v.val = READ_ONCE(sk->sk_sndbuf);
+               break;
+ 
+       case SO_RCVBUF:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-annotate-data-race-around-sk-sk_txrehash.patch b/queue-6.4/net-annotate-data-race-around-sk-sk_txrehash.patch

new file mode 100644 (file)

index 0000000..748ffe6
--- /dev/null
+++ b/queue-6.4/net-annotate-data-race-around-sk-sk_txrehash.patch
@@ -0,0 +1,52 @@
+From 61540bad61147bf9a5bf03c521e702f2bbbec2ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:09 +0000
+Subject: net: annotate data-race around sk->sk_txrehash
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit c76a0328899bbe226f8adeb88b8da9e4167bd316 ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_txrehash
+can be read while other threads are changing its value.
+
+Other locations were handled in commit cb6cd2cec799
+("tcp: Change SYN ACK retransmit behaviour to account for rehash")
+
+Fixes: 26859240e4ee ("txhash: Add socket option to control TX hash rethink behavior")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Akhmat Karakotov <hmukos@yandex-team.ru>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 7b88290ddc6e7..b25511e7e8103 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1523,7 +1523,9 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
+               }
+               if ((u8)val == SOCK_TXREHASH_DEFAULT)
+                       val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
+-              /* Paired with READ_ONCE() in tcp_rtx_synack() */
++              /* Paired with READ_ONCE() in tcp_rtx_synack()
++               * and sk_getsockopt().
++               */
+               WRITE_ONCE(sk->sk_txrehash, (u8)val);
+               break;
+ 
+@@ -1930,7 +1932,8 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_TXREHASH:
+-              v.val = sk->sk_txrehash;
++              /* Paired with WRITE_ONCE() in sk_setsockopt() */
++              v.val = READ_ONCE(sk->sk_txrehash);
+               break;
+ 
+       default:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-annotate-data-races-around-sk-sk_mark.patch b/queue-6.4/net-annotate-data-races-around-sk-sk_mark.patch

new file mode 100644 (file)

index 0000000..c48defd
--- /dev/null
+++ b/queue-6.4/net-annotate-data-races-around-sk-sk_mark.patch
@@ -0,0 +1,462 @@
+From 9bbaddf213a4254d3de17e9b78f2d66006c52be8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:15 +0000
+Subject: net: annotate data-races around sk->sk_mark
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3c5b4d69c358a9275a8de98f87caf6eda644b086 ]
+
+sk->sk_mark is often read while another thread could change the value.
+
+Fixes: 4a19ec5800fc ("[NET]: Introducing socket mark socket option.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/inet_sock.h    | 7 ++++---
+ include/net/ip.h           | 2 +-
+ include/net/route.h        | 4 ++--
+ net/can/raw.c              | 2 +-
+ net/core/sock.c            | 4 ++--
+ net/dccp/ipv6.c            | 4 ++--
+ net/ipv4/inet_diag.c       | 4 ++--
+ net/ipv4/ip_output.c       | 4 ++--
+ net/ipv4/route.c           | 4 ++--
+ net/ipv4/tcp_ipv4.c        | 2 +-
+ net/ipv6/ping.c            | 2 +-
+ net/ipv6/raw.c             | 4 ++--
+ net/ipv6/route.c           | 7 ++++---
+ net/ipv6/tcp_ipv6.c        | 6 +++---
+ net/ipv6/udp.c             | 4 ++--
+ net/l2tp/l2tp_ip6.c        | 2 +-
+ net/mptcp/sockopt.c        | 2 +-
+ net/netfilter/nft_socket.c | 2 +-
+ net/netfilter/xt_socket.c  | 4 ++--
+ net/packet/af_packet.c     | 6 +++---
+ net/smc/af_smc.c           | 2 +-
+ net/xdp/xsk.c              | 2 +-
+ net/xfrm/xfrm_policy.c     | 2 +-
+ 23 files changed, 42 insertions(+), 40 deletions(-)
+
+diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
+index caa20a9055310..0bb32bfc61832 100644
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -107,11 +107,12 @@ static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
+ 
+ static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
+ {
+-      if (!sk->sk_mark &&
+-          READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
++      u32 mark = READ_ONCE(sk->sk_mark);
++
++      if (!mark && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
+               return skb->mark;
+ 
+-      return sk->sk_mark;
++      return mark;
+ }
+ 
+ static inline int inet_request_bound_dev_if(const struct sock *sk,
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 83a1a9bc3ceb1..530e7257e4389 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -93,7 +93,7 @@ static inline void ipcm_init_sk(struct ipcm_cookie *ipcm,
+ {
+       ipcm_init(ipcm);
+ 
+-      ipcm->sockc.mark = inet->sk.sk_mark;
++      ipcm->sockc.mark = READ_ONCE(inet->sk.sk_mark);
+       ipcm->sockc.tsflags = inet->sk.sk_tsflags;
+       ipcm->oif = READ_ONCE(inet->sk.sk_bound_dev_if);
+       ipcm->addr = inet->inet_saddr;
+diff --git a/include/net/route.h b/include/net/route.h
+index bcc367cf3aa2d..9ca0f72868b76 100644
+--- a/include/net/route.h
++++ b/include/net/route.h
+@@ -168,7 +168,7 @@ static inline struct rtable *ip_route_output_ports(struct net *net, struct flowi
+                                                  __be16 dport, __be16 sport,
+                                                  __u8 proto, __u8 tos, int oif)
+ {
+-      flowi4_init_output(fl4, oif, sk ? sk->sk_mark : 0, tos,
++      flowi4_init_output(fl4, oif, sk ? READ_ONCE(sk->sk_mark) : 0, tos,
+                          RT_SCOPE_UNIVERSE, proto,
+                          sk ? inet_sk_flowi_flags(sk) : 0,
+                          daddr, saddr, dport, sport, sock_net_uid(net, sk));
+@@ -301,7 +301,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst,
+       if (inet_sk(sk)->transparent)
+               flow_flags |= FLOWI_FLAG_ANYSRC;
+ 
+-      flowi4_init_output(fl4, oif, sk->sk_mark, ip_sock_rt_tos(sk),
++      flowi4_init_output(fl4, oif, READ_ONCE(sk->sk_mark), ip_sock_rt_tos(sk),
+                          ip_sock_rt_scope(sk), protocol, flow_flags, dst,
+                          src, dport, sport, sk->sk_uid);
+ }
+diff --git a/net/can/raw.c b/net/can/raw.c
+index f64469b98260f..f8e3866157a33 100644
+--- a/net/can/raw.c
++++ b/net/can/raw.c
+@@ -873,7 +873,7 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+ 
+       skb->dev = dev;
+       skb->priority = sk->sk_priority;
+-      skb->mark = sk->sk_mark;
++      skb->mark = READ_ONCE(sk->sk_mark);
+       skb->tstamp = sockc.transmit_time;
+ 
+       skb_setup_tx_timestamp(skb, sockc.tsflags);
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 8c69610753ec2..9298dffbe46b8 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -984,7 +984,7 @@ EXPORT_SYMBOL(sock_set_rcvbuf);
+ static void __sock_set_mark(struct sock *sk, u32 val)
+ {
+       if (val != sk->sk_mark) {
+-              sk->sk_mark = val;
++              WRITE_ONCE(sk->sk_mark, val);
+               sk_dst_reset(sk);
+       }
+ }
+@@ -1799,7 +1799,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+                                                        optval, optlen, len);
+ 
+       case SO_MARK:
+-              v.val = sk->sk_mark;
++              v.val = READ_ONCE(sk->sk_mark);
+               break;
+ 
+       case SO_RCVMARK:
+diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
+index 93c98990d7263..94b69a50c8b50 100644
+--- a/net/dccp/ipv6.c
++++ b/net/dccp/ipv6.c
+@@ -238,8 +238,8 @@ static int dccp_v6_send_response(const struct sock *sk, struct request_sock *req
+               opt = ireq->ipv6_opt;
+               if (!opt)
+                       opt = rcu_dereference(np->opt);
+-              err = ip6_xmit(sk, skb, &fl6, sk->sk_mark, opt, np->tclass,
+-                             sk->sk_priority);
++              err = ip6_xmit(sk, skb, &fl6, READ_ONCE(sk->sk_mark), opt,
++                             np->tclass, sk->sk_priority);
+               rcu_read_unlock();
+               err = net_xmit_eval(err);
+       }
+diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
+index b812eb36f0e36..f7426926a1041 100644
+--- a/net/ipv4/inet_diag.c
++++ b/net/ipv4/inet_diag.c
+@@ -150,7 +150,7 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
+       }
+ #endif
+ 
+-      if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark))
++      if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, READ_ONCE(sk->sk_mark)))
+               goto errout;
+ 
+       if (ext & (1 << (INET_DIAG_CLASS_ID - 1)) ||
+@@ -799,7 +799,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+       entry.ifindex = sk->sk_bound_dev_if;
+       entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
+       if (sk_fullsock(sk))
+-              entry.mark = sk->sk_mark;
++              entry.mark = READ_ONCE(sk->sk_mark);
+       else if (sk->sk_state == TCP_NEW_SYN_RECV)
+               entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
+       else if (sk->sk_state == TCP_TIME_WAIT)
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index d95e40a47098a..80c94749eafe2 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -186,7 +186,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
+ 
+       skb->priority = sk->sk_priority;
+       if (!skb->mark)
+-              skb->mark = sk->sk_mark;
++              skb->mark = READ_ONCE(sk->sk_mark);
+ 
+       /* Send it out. */
+       return ip_local_out(net, skb->sk, skb);
+@@ -529,7 +529,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+ 
+       /* TODO : should we use skb->sk here instead of sk ? */
+       skb->priority = sk->sk_priority;
+-      skb->mark = sk->sk_mark;
++      skb->mark = READ_ONCE(sk->sk_mark);
+ 
+       res = ip_local_out(net, sk, skb);
+       rcu_read_unlock();
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 98d7e6ba7493b..92fede388d520 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -518,7 +518,7 @@ static void __build_flow_key(const struct net *net, struct flowi4 *fl4,
+               const struct inet_sock *inet = inet_sk(sk);
+ 
+               oif = sk->sk_bound_dev_if;
+-              mark = sk->sk_mark;
++              mark = READ_ONCE(sk->sk_mark);
+               tos = ip_sock_rt_tos(sk);
+               scope = ip_sock_rt_scope(sk);
+               prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
+@@ -552,7 +552,7 @@ static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
+       inet_opt = rcu_dereference(inet->inet_opt);
+       if (inet_opt && inet_opt->opt.srr)
+               daddr = inet_opt->opt.faddr;
+-      flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
++      flowi4_init_output(fl4, sk->sk_bound_dev_if, READ_ONCE(sk->sk_mark),
+                          ip_sock_rt_tos(sk) & IPTOS_RT_MASK,
+                          ip_sock_rt_scope(sk),
+                          inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index f37d13ee7b4cc..48429f0ee23b0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -931,7 +931,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+       ctl_sk = this_cpu_read(ipv4_tcp_sk);
+       sock_net_set(ctl_sk, net);
+       ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+-                         inet_twsk(sk)->tw_mark : sk->sk_mark;
++                         inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
+       ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+                          inet_twsk(sk)->tw_priority : sk->sk_priority;
+       transmit_time = tcp_transmit_time(sk);
+diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
+index f804c11e2146c..c2c291827a2ce 100644
+--- a/net/ipv6/ping.c
++++ b/net/ipv6/ping.c
+@@ -120,7 +120,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+ 
+       ipcm6_init_sk(&ipc6, np);
+       ipc6.sockc.tsflags = sk->sk_tsflags;
+-      ipc6.sockc.mark = sk->sk_mark;
++      ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+ 
+       fl6.flowi6_oif = oif;
+ 
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index 44ee7a2e72ac2..a90a09658a71a 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -774,12 +774,12 @@ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+        */
+       memset(&fl6, 0, sizeof(fl6));
+ 
+-      fl6.flowi6_mark = sk->sk_mark;
++      fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
+       fl6.flowi6_uid = sk->sk_uid;
+ 
+       ipcm6_init(&ipc6);
+       ipc6.sockc.tsflags = sk->sk_tsflags;
+-      ipc6.sockc.mark = sk->sk_mark;
++      ipc6.sockc.mark = fl6.flowi6_mark;
+ 
+       if (sin6) {
+               if (addr_len < SIN6_LEN_RFC2133)
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index 392aaa373b667..d5c6be77ec1ea 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -2951,7 +2951,8 @@ void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
+       if (!oif && skb->dev)
+               oif = l3mdev_master_ifindex(skb->dev);
+ 
+-      ip6_update_pmtu(skb, sock_net(sk), mtu, oif, sk->sk_mark, sk->sk_uid);
++      ip6_update_pmtu(skb, sock_net(sk), mtu, oif, READ_ONCE(sk->sk_mark),
++                      sk->sk_uid);
+ 
+       dst = __sk_dst_get(sk);
+       if (!dst || !dst->obsolete ||
+@@ -3172,8 +3173,8 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif)
+ 
+ void ip6_sk_redirect(struct sk_buff *skb, struct sock *sk)
+ {
+-      ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if, sk->sk_mark,
+-                   sk->sk_uid);
++      ip6_redirect(skb, sock_net(sk), sk->sk_bound_dev_if,
++                   READ_ONCE(sk->sk_mark), sk->sk_uid);
+ }
+ EXPORT_SYMBOL_GPL(ip6_sk_redirect);
+ 
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index f7c248a7f8d1d..346c9bcd5849d 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -568,8 +568,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
+               opt = ireq->ipv6_opt;
+               if (!opt)
+                       opt = rcu_dereference(np->opt);
+-              err = ip6_xmit(sk, skb, fl6, skb->mark ? : sk->sk_mark, opt,
+-                             tclass, sk->sk_priority);
++              err = ip6_xmit(sk, skb, fl6, skb->mark ? : READ_ONCE(sk->sk_mark),
++                             opt, tclass, sk->sk_priority);
+               rcu_read_unlock();
+               err = net_xmit_eval(err);
+       }
+@@ -943,7 +943,7 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32
+               if (sk->sk_state == TCP_TIME_WAIT)
+                       mark = inet_twsk(sk)->tw_mark;
+               else
+-                      mark = sk->sk_mark;
++                      mark = READ_ONCE(sk->sk_mark);
+               skb_set_delivery_time(buff, tcp_transmit_time(sk), true);
+       }
+       if (txhash) {
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index 27292d44df654..8521729fb2375 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -628,7 +628,7 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+       if (type == NDISC_REDIRECT) {
+               if (tunnel) {
+                       ip6_redirect(skb, sock_net(sk), inet6_iif(skb),
+-                                   sk->sk_mark, sk->sk_uid);
++                                   READ_ONCE(sk->sk_mark), sk->sk_uid);
+               } else {
+                       ip6_sk_redirect(skb, sk);
+               }
+@@ -1360,7 +1360,7 @@ int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       ipcm6_init(&ipc6);
+       ipc6.gso_size = READ_ONCE(up->gso_size);
+       ipc6.sockc.tsflags = sk->sk_tsflags;
+-      ipc6.sockc.mark = sk->sk_mark;
++      ipc6.sockc.mark = READ_ONCE(sk->sk_mark);
+ 
+       /* destination address check */
+       if (sin6) {
+diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
+index 5137ea1861ce2..bce4132b0a5c8 100644
+--- a/net/l2tp/l2tp_ip6.c
++++ b/net/l2tp/l2tp_ip6.c
+@@ -519,7 +519,7 @@ static int l2tp_ip6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
+       /* Get and verify the address */
+       memset(&fl6, 0, sizeof(fl6));
+ 
+-      fl6.flowi6_mark = sk->sk_mark;
++      fl6.flowi6_mark = READ_ONCE(sk->sk_mark);
+       fl6.flowi6_uid = sk->sk_uid;
+ 
+       ipcm6_init(&ipc6);
+diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
+index d4258869ac48f..64fcfc3d5270f 100644
+--- a/net/mptcp/sockopt.c
++++ b/net/mptcp/sockopt.c
+@@ -102,7 +102,7 @@ static void mptcp_sol_socket_sync_intval(struct mptcp_sock *msk, int optname, in
+                       break;
+               case SO_MARK:
+                       if (READ_ONCE(ssk->sk_mark) != sk->sk_mark) {
+-                              ssk->sk_mark = sk->sk_mark;
++                              WRITE_ONCE(ssk->sk_mark, sk->sk_mark);
+                               sk_dst_reset(ssk);
+                       }
+                       break;
+diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
+index 85f8df87efdaa..1dd336a3ce786 100644
+--- a/net/netfilter/nft_socket.c
++++ b/net/netfilter/nft_socket.c
+@@ -107,7 +107,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
+               break;
+       case NFT_SOCKET_MARK:
+               if (sk_fullsock(sk)) {
+-                      *dest = sk->sk_mark;
++                      *dest = READ_ONCE(sk->sk_mark);
+               } else {
+                       regs->verdict.code = NFT_BREAK;
+                       return;
+diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
+index 7013f55f05d1e..76e01f292aaff 100644
+--- a/net/netfilter/xt_socket.c
++++ b/net/netfilter/xt_socket.c
+@@ -77,7 +77,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ 
+               if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+                   transparent && sk_fullsock(sk))
+-                      pskb->mark = sk->sk_mark;
++                      pskb->mark = READ_ONCE(sk->sk_mark);
+ 
+               if (sk != skb->sk)
+                       sock_gen_put(sk);
+@@ -138,7 +138,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
+ 
+               if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+                   transparent && sk_fullsock(sk))
+-                      pskb->mark = sk->sk_mark;
++                      pskb->mark = READ_ONCE(sk->sk_mark);
+ 
+               if (sk != skb->sk)
+                       sock_gen_put(sk);
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index a2dbeb264f260..6f033c334c7b4 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2051,7 +2051,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
+       skb->protocol = proto;
+       skb->dev = dev;
+       skb->priority = sk->sk_priority;
+-      skb->mark = sk->sk_mark;
++      skb->mark = READ_ONCE(sk->sk_mark);
+       skb->tstamp = sockc.transmit_time;
+ 
+       skb_setup_tx_timestamp(skb, sockc.tsflags);
+@@ -2586,7 +2586,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
+       skb->protocol = proto;
+       skb->dev = dev;
+       skb->priority = po->sk.sk_priority;
+-      skb->mark = po->sk.sk_mark;
++      skb->mark = READ_ONCE(po->sk.sk_mark);
+       skb->tstamp = sockc->transmit_time;
+       skb_setup_tx_timestamp(skb, sockc->tsflags);
+       skb_zcopy_set_nouarg(skb, ph.raw);
+@@ -2988,7 +2988,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
+               goto out_unlock;
+ 
+       sockcm_init(&sockc, sk);
+-      sockc.mark = sk->sk_mark;
++      sockc.mark = READ_ONCE(sk->sk_mark);
+       if (msg->msg_controllen) {
+               err = sock_cmsg_send(sk, msg, &sockc);
+               if (unlikely(err))
+diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
+index 538e9c6ec8c98..fa6b54c1411cb 100644
+--- a/net/smc/af_smc.c
++++ b/net/smc/af_smc.c
+@@ -445,7 +445,7 @@ static void smc_copy_sock_settings(struct sock *nsk, struct sock *osk,
+       nsk->sk_rcvbuf = osk->sk_rcvbuf;
+       nsk->sk_sndtimeo = osk->sk_sndtimeo;
+       nsk->sk_rcvtimeo = osk->sk_rcvtimeo;
+-      nsk->sk_mark = osk->sk_mark;
++      nsk->sk_mark = READ_ONCE(osk->sk_mark);
+       nsk->sk_priority = osk->sk_priority;
+       nsk->sk_rcvlowat = osk->sk_rcvlowat;
+       nsk->sk_bound_dev_if = osk->sk_bound_dev_if;
+diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
+index 32dd55b9ce8a8..35e518eaaebae 100644
+--- a/net/xdp/xsk.c
++++ b/net/xdp/xsk.c
+@@ -505,7 +505,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
+ 
+       skb->dev = dev;
+       skb->priority = xs->sk.sk_priority;
+-      skb->mark = xs->sk.sk_mark;
++      skb->mark = READ_ONCE(xs->sk.sk_mark);
+       skb_shinfo(skb)->destructor_arg = (void *)(long)desc->addr;
+       skb->destructor = xsk_destruct_skb;
+ 
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index e7617c9959c31..d6b405782b636 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -2250,7 +2250,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
+ 
+               match = xfrm_selector_match(&pol->selector, fl, family);
+               if (match) {
+-                      if ((sk->sk_mark & pol->mark.m) != pol->mark.v ||
++                      if ((READ_ONCE(sk->sk_mark) & pol->mark.m) != pol->mark.v ||
+                           pol->if_id != if_id) {
+                               pol = NULL;
+                               goto out;
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch b/queue-6.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch

new file mode 100644 (file)

index 0000000..542cfd9
--- /dev/null
+++ b/queue-6.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch
@@ -0,0 +1,54 @@
+From bd2bafe1bb3c0e40a4b54da0864fcb4e2129366b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:10 +0000
+Subject: net: annotate data-races around sk->sk_max_pacing_rate
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ea7f45ef77b39e72244d282e47f6cb1ef4135cd2 ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_max_pacing_rate
+can be read while other threads are changing its value.
+
+Fixes: 62748f32d501 ("net: introduce SO_MAX_PACING_RATE")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 9 ++++++---
+ 1 file changed, 6 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index b25511e7e8103..b87f498072251 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1428,7 +1428,8 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
+                       cmpxchg(&sk->sk_pacing_status,
+                               SK_PACING_NONE,
+                               SK_PACING_NEEDED);
+-              sk->sk_max_pacing_rate = ulval;
++              /* Pairs with READ_ONCE() from sk_getsockopt() */
++              WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
+               sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
+               break;
+               }
+@@ -1855,12 +1856,14 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+ #endif
+ 
+       case SO_MAX_PACING_RATE:
++              /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
+               if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
+                       lv = sizeof(v.ulval);
+-                      v.ulval = sk->sk_max_pacing_rate;
++                      v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
+               } else {
+                       /* 32bit version */
+-                      v.val = min_t(unsigned long, sk->sk_max_pacing_rate, ~0U);
++                      v.val = min_t(unsigned long, ~0U,
++                                    READ_ONCE(sk->sk_max_pacing_rate));
+               }
+               break;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-annotate-data-races-around-sk-sk_priority.patch b/queue-6.4/net-annotate-data-races-around-sk-sk_priority.patch

new file mode 100644 (file)

index 0000000..731d8f0
--- /dev/null
+++ b/queue-6.4/net-annotate-data-races-around-sk-sk_priority.patch
@@ -0,0 +1,184 @@
+From 46296cb1fa493da904268b0e636b9d68fcbeed95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:18 +0000
+Subject: net: annotate data-races around sk->sk_priority
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8bf43be799d4b242ea552a14db10456446be843e ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_priority
+can be read while other threads are changing its value.
+
+Other reads also happen without socket lock being held.
+
+Add missing annotations where needed.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c        | 6 +++---
+ net/ipv4/ip_output.c   | 4 ++--
+ net/ipv4/ip_sockglue.c | 2 +-
+ net/ipv4/raw.c         | 2 +-
+ net/ipv4/tcp_ipv4.c    | 2 +-
+ net/ipv6/raw.c         | 2 +-
+ net/ipv6/tcp_ipv6.c    | 3 ++-
+ net/packet/af_packet.c | 6 +++---
+ 8 files changed, 14 insertions(+), 13 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index c0c495e0a474e..1f31a97100d4f 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -800,7 +800,7 @@ EXPORT_SYMBOL(sock_no_linger);
+ void sock_set_priority(struct sock *sk, u32 priority)
+ {
+       lock_sock(sk);
+-      sk->sk_priority = priority;
++      WRITE_ONCE(sk->sk_priority, priority);
+       release_sock(sk);
+ }
+ EXPORT_SYMBOL(sock_set_priority);
+@@ -1210,7 +1210,7 @@ int sk_setsockopt(struct sock *sk, int level, int optname,
+               if ((val >= 0 && val <= 6) ||
+                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) ||
+                   sockopt_ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+-                      sk->sk_priority = val;
++                      WRITE_ONCE(sk->sk_priority, val);
+               else
+                       ret = -EPERM;
+               break;
+@@ -1672,7 +1672,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_PRIORITY:
+-              v.val = sk->sk_priority;
++              v.val = READ_ONCE(sk->sk_priority);
+               break;
+ 
+       case SO_LINGER:
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 80c94749eafe2..6f6f63cf9224f 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -184,7 +184,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
+               ip_options_build(skb, &opt->opt, daddr, rt);
+       }
+ 
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       if (!skb->mark)
+               skb->mark = READ_ONCE(sk->sk_mark);
+ 
+@@ -528,7 +528,7 @@ int __ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
+                            skb_shinfo(skb)->gso_segs ?: 1);
+ 
+       /* TODO : should we use skb->sk here instead of sk ? */
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = READ_ONCE(sk->sk_mark);
+ 
+       res = ip_local_out(net, sk, skb);
+diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+index 8e97d8d4cc9d9..d41bce8927b2c 100644
+--- a/net/ipv4/ip_sockglue.c
++++ b/net/ipv4/ip_sockglue.c
+@@ -592,7 +592,7 @@ void __ip_sock_set_tos(struct sock *sk, int val)
+       }
+       if (inet_sk(sk)->tos != val) {
+               inet_sk(sk)->tos = val;
+-              sk->sk_priority = rt_tos2priority(val);
++              WRITE_ONCE(sk->sk_priority, rt_tos2priority(val));
+               sk_dst_reset(sk);
+       }
+ }
+diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
+index eadf1c9ef7e49..fb31624019435 100644
+--- a/net/ipv4/raw.c
++++ b/net/ipv4/raw.c
+@@ -348,7 +348,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
+               goto error;
+       skb_reserve(skb, hlen);
+ 
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = sockc->mark;
+       skb->tstamp = sockc->transmit_time;
+       skb_dst_set(skb, &rt->dst);
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index 48429f0ee23b0..498dd4acdeec8 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -933,7 +933,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+       ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ?
+                          inet_twsk(sk)->tw_mark : READ_ONCE(sk->sk_mark);
+       ctl_sk->sk_priority = (sk->sk_state == TCP_TIME_WAIT) ?
+-                         inet_twsk(sk)->tw_priority : sk->sk_priority;
++                         inet_twsk(sk)->tw_priority : READ_ONCE(sk->sk_priority);
+       transmit_time = tcp_transmit_time(sk);
+       ip_send_unicast_reply(ctl_sk,
+                             skb, &TCP_SKB_CB(skb)->header.h4.opt,
+diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
+index a90a09658a71a..d85d2082aeb77 100644
+--- a/net/ipv6/raw.c
++++ b/net/ipv6/raw.c
+@@ -614,7 +614,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
+       skb_reserve(skb, hlen);
+ 
+       skb->protocol = htons(ETH_P_IPV6);
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = sockc->mark;
+       skb->tstamp = sockc->transmit_time;
+ 
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 346c9bcd5849d..3155692a0e06b 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1132,7 +1132,8 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+                       READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
+                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+-                      ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
++                      ipv6_get_dsfield(ipv6_hdr(skb)), 0,
++                      READ_ONCE(sk->sk_priority),
+                       READ_ONCE(tcp_rsk(req)->txhash));
+ }
+ 
+diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
+index 6f033c334c7b4..a753246ef1657 100644
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -2050,7 +2050,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
+ 
+       skb->protocol = proto;
+       skb->dev = dev;
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = READ_ONCE(sk->sk_mark);
+       skb->tstamp = sockc.transmit_time;
+ 
+@@ -2585,7 +2585,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
+ 
+       skb->protocol = proto;
+       skb->dev = dev;
+-      skb->priority = po->sk.sk_priority;
++      skb->priority = READ_ONCE(po->sk.sk_priority);
+       skb->mark = READ_ONCE(po->sk.sk_mark);
+       skb->tstamp = sockc->transmit_time;
+       skb_setup_tx_timestamp(skb, sockc->tsflags);
+@@ -3061,7 +3061,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
+ 
+       skb->protocol = proto;
+       skb->dev = dev;
+-      skb->priority = sk->sk_priority;
++      skb->priority = READ_ONCE(sk->sk_priority);
+       skb->mark = sockc.mark;
+       skb->tstamp = sockc.transmit_time;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-annotate-data-races-around-sk-sk_reserved_mem.patch b/queue-6.4/net-annotate-data-races-around-sk-sk_reserved_mem.patch

new file mode 100644 (file)

index 0000000..4a51365
--- /dev/null
+++ b/queue-6.4/net-annotate-data-races-around-sk-sk_reserved_mem.patch
@@ -0,0 +1,58 @@
+From 3635dfb8ba8ad25c5851c768b581f30e3e060aa3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 15:03:08 +0000
+Subject: net: annotate data-races around sk->sk_reserved_mem
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit fe11fdcb4207907d80cda2e73777465d68131e66 ]
+
+sk_getsockopt() runs locklessly. This means sk->sk_reserved_mem
+can be read while other threads are changing its value.
+
+Add missing annotations where they are needed.
+
+Fixes: 2bb2f5fb21b0 ("net: add new socket option SO_RESERVE_MEM")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Wei Wang <weiwan@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/net/core/sock.c b/net/core/sock.c
+index 4a0edccf86066..7b88290ddc6e7 100644
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1003,7 +1003,7 @@ static void sock_release_reserved_memory(struct sock *sk, int bytes)
+       bytes = round_down(bytes, PAGE_SIZE);
+ 
+       WARN_ON(bytes > sk->sk_reserved_mem);
+-      sk->sk_reserved_mem -= bytes;
++      WRITE_ONCE(sk->sk_reserved_mem, sk->sk_reserved_mem - bytes);
+       sk_mem_reclaim(sk);
+ }
+ 
+@@ -1040,7 +1040,8 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
+       }
+       sk->sk_forward_alloc += pages << PAGE_SHIFT;
+ 
+-      sk->sk_reserved_mem += pages << PAGE_SHIFT;
++      WRITE_ONCE(sk->sk_reserved_mem,
++                 sk->sk_reserved_mem + (pages << PAGE_SHIFT));
+ 
+       return 0;
+ }
+@@ -1925,7 +1926,7 @@ int sk_getsockopt(struct sock *sk, int level, int optname,
+               break;
+ 
+       case SO_RESERVE_MEM:
+-              v.val = sk->sk_reserved_mem;
++              v.val = READ_ONCE(sk->sk_reserved_mem);
+               break;
+ 
+       case SO_TXREHASH:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch b/queue-6.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch

new file mode 100644 (file)

index 0000000..5dcbebf
--- /dev/null
+++ b/queue-6.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch
@@ -0,0 +1,103 @@
+From 9f8a65be2c428eac14abb0d7a26bf0aee883d757 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 09:32:48 +0800
+Subject: net: dcb: choose correct policy to parse DCB_ATTR_BCN
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit 31d49ba033095f6e8158c60f69714a500922e0c3 ]
+
+The dcbnl_bcn_setcfg uses erroneous policy to parse tb[DCB_ATTR_BCN],
+which is introduced in commit 859ee3c43812 ("DCB: Add support for DCB
+BCN"). Please see the comment in below code
+
+static int dcbnl_bcn_setcfg(...)
+{
+  ...
+  ret = nla_parse_nested_deprecated(..., dcbnl_pfc_up_nest, .. )
+  // !!! dcbnl_pfc_up_nest for attributes
+  //  DCB_PFC_UP_ATTR_0 to DCB_PFC_UP_ATTR_ALL in enum dcbnl_pfc_up_attrs
+  ...
+  for (i = DCB_BCN_ATTR_RP_0; i <= DCB_BCN_ATTR_RP_7; i++) {
+  // !!! DCB_BCN_ATTR_RP_0 to DCB_BCN_ATTR_RP_7 in enum dcbnl_bcn_attrs
+    ...
+    value_byte = nla_get_u8(data[i]);
+    ...
+  }
+  ...
+  for (i = DCB_BCN_ATTR_BCNA_0; i <= DCB_BCN_ATTR_RI; i++) {
+  // !!! DCB_BCN_ATTR_BCNA_0 to DCB_BCN_ATTR_RI in enum dcbnl_bcn_attrs
+  ...
+    value_int = nla_get_u32(data[i]);
+  ...
+  }
+  ...
+}
+
+That is, the nla_parse_nested_deprecated uses dcbnl_pfc_up_nest
+attributes to parse nlattr defined in dcbnl_pfc_up_attrs. But the
+following access code fetch each nlattr as dcbnl_bcn_attrs attributes.
+By looking up the associated nla_policy for dcbnl_bcn_attrs. We can find
+the beginning part of these two policies are "same".
+
+static const struct nla_policy dcbnl_pfc_up_nest[...] = {
+        [DCB_PFC_UP_ATTR_0]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_1]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_2]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_3]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_4]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_5]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_6]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_7]   = {.type = NLA_U8},
+        [DCB_PFC_UP_ATTR_ALL] = {.type = NLA_FLAG},
+};
+
+static const struct nla_policy dcbnl_bcn_nest[...] = {
+        [DCB_BCN_ATTR_RP_0]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_1]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_2]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_3]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_4]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_5]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_6]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_7]         = {.type = NLA_U8},
+        [DCB_BCN_ATTR_RP_ALL]       = {.type = NLA_FLAG},
+        // from here is somewhat different
+        [DCB_BCN_ATTR_BCNA_0]       = {.type = NLA_U32},
+        ...
+        [DCB_BCN_ATTR_ALL]          = {.type = NLA_FLAG},
+};
+
+Therefore, the current code is buggy and this
+nla_parse_nested_deprecated could overflow the dcbnl_pfc_up_nest and use
+the adjacent nla_policy to parse attributes from DCB_BCN_ATTR_BCNA_0.
+
+Hence use the correct policy dcbnl_bcn_nest to parse the nested
+tb[DCB_ATTR_BCN] TLV.
+
+Fixes: 859ee3c43812 ("DCB: Add support for DCB BCN")
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://lore.kernel.org/r/20230801013248.87240-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/dcb/dcbnl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
+index c0c4381285759..2e6b8c8fd2ded 100644
+--- a/net/dcb/dcbnl.c
++++ b/net/dcb/dcbnl.c
+@@ -980,7 +980,7 @@ static int dcbnl_bcn_setcfg(struct net_device *netdev, struct nlmsghdr *nlh,
+               return -EOPNOTSUPP;
+ 
+       ret = nla_parse_nested_deprecated(data, DCB_BCN_ATTR_MAX,
+-                                        tb[DCB_ATTR_BCN], dcbnl_pfc_up_nest,
++                                        tb[DCB_ATTR_BCN], dcbnl_bcn_nest,
+                                         NULL);
+       if (ret)
+               return ret;
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-dsa-fix-value-check-in-bcm_sf2_sw_probe.patch b/queue-6.4/net-dsa-fix-value-check-in-bcm_sf2_sw_probe.patch

new file mode 100644 (file)

index 0000000..3f03e9e
--- /dev/null
+++ b/queue-6.4/net-dsa-fix-value-check-in-bcm_sf2_sw_probe.patch
@@ -0,0 +1,52 @@
+From ab1e0a1f3b045d8c00c4ba94475780cdedc2491f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 01:05:06 +0800
+Subject: net: dsa: fix value check in bcm_sf2_sw_probe()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit dadc5b86cc9459581f37fe755b431adc399ea393 ]
+
+in bcm_sf2_sw_probe(), check the return value of clk_prepare_enable()
+and return the error code if clk_prepare_enable() returns an
+unexpected value.
+
+Fixes: e9ec5c3bd238 ("net: dsa: bcm_sf2: request and handle clocks")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: Florian Fainelli <florian.fainelli@broadcom.com>
+Link: https://lore.kernel.org/r/20230726170506.16547-1-ruc_gongyuanjun@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/bcm_sf2.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c
+index cde253d27bd08..72374b066f64a 100644
+--- a/drivers/net/dsa/bcm_sf2.c
++++ b/drivers/net/dsa/bcm_sf2.c
+@@ -1436,7 +1436,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
+       if (IS_ERR(priv->clk))
+               return PTR_ERR(priv->clk);
+ 
+-      clk_prepare_enable(priv->clk);
++      ret = clk_prepare_enable(priv->clk);
++      if (ret)
++              return ret;
+ 
+       priv->clk_mdiv = devm_clk_get_optional(&pdev->dev, "sw_switch_mdiv");
+       if (IS_ERR(priv->clk_mdiv)) {
+@@ -1444,7 +1446,9 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
+               goto out_clk;
+       }
+ 
+-      clk_prepare_enable(priv->clk_mdiv);
++      ret = clk_prepare_enable(priv->clk_mdiv);
++      if (ret)
++              goto out_clk;
+ 
+       ret = bcm_sf2_sw_rst(priv);
+       if (ret) {
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-gro-fix-misuse-of-cb-in-udp-socket-lookup.patch b/queue-6.4/net-gro-fix-misuse-of-cb-in-udp-socket-lookup.patch

new file mode 100644 (file)

index 0000000..8367078
--- /dev/null
+++ b/queue-6.4/net-gro-fix-misuse-of-cb-in-udp-socket-lookup.patch
@@ -0,0 +1,182 @@
+From d027ebcd275c600b7a1f0a858b81b9f659e48930 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 17:33:56 +0200
+Subject: net: gro: fix misuse of CB in udp socket lookup
+
+From: Richard Gobert <richardbgobert@gmail.com>
+
+[ Upstream commit 7938cd15436873f649f31cb867bac2d88ca564d0 ]
+
+This patch fixes a misuse of IP{6}CB(skb) in GRO, while calling to
+`udp6_lib_lookup2` when handling udp tunnels. `udp6_lib_lookup2` fetch the
+device from CB. The fix changes it to fetch the device from `skb->dev`.
+l3mdev case requires special attention since it has a master and a slave
+device.
+
+Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket")
+Reported-by: Gal Pressman <gal@nvidia.com>
+Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/gro.h      | 43 ++++++++++++++++++++++++++++++++++++++++++
+ net/ipv4/udp.c         |  8 ++++++--
+ net/ipv4/udp_offload.c |  7 +++++--
+ net/ipv6/udp.c         |  8 ++++++--
+ net/ipv6/udp_offload.c |  7 +++++--
+ 5 files changed, 65 insertions(+), 8 deletions(-)
+
+diff --git a/include/net/gro.h b/include/net/gro.h
+index 972ff42d3a829..d3d318e7d917b 100644
+--- a/include/net/gro.h
++++ b/include/net/gro.h
+@@ -446,6 +446,49 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
+               gro_normal_list(napi);
+ }
+ 
++/* This function is the alternative of 'inet_iif' and 'inet_sdif'
++ * functions in case we can not rely on fields of IPCB.
++ *
++ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
++ * The caller must hold the RCU read lock.
++ */
++static inline void inet_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
++{
++      *iif = inet_iif(skb) ?: skb->dev->ifindex;
++      *sdif = 0;
++
++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
++      if (netif_is_l3_slave(skb->dev)) {
++              struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
++
++              *sdif = *iif;
++              *iif = master ? master->ifindex : 0;
++      }
++#endif
++}
++
++/* This function is the alternative of 'inet6_iif' and 'inet6_sdif'
++ * functions in case we can not rely on fields of IP6CB.
++ *
++ * The caller must verify skb_valid_dst(skb) is false and skb->dev is initialized.
++ * The caller must hold the RCU read lock.
++ */
++static inline void inet6_get_iif_sdif(const struct sk_buff *skb, int *iif, int *sdif)
++{
++      /* using skb->dev->ifindex because skb_dst(skb) is not initialized */
++      *iif = skb->dev->ifindex;
++      *sdif = 0;
++
++#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
++      if (netif_is_l3_slave(skb->dev)) {
++              struct net_device *master = netdev_master_upper_dev_get_rcu(skb->dev);
++
++              *sdif = *iif;
++              *iif = master ? master->ifindex : 0;
++      }
++#endif
++}
++
+ extern struct list_head offload_base;
+ 
+ #endif /* _NET_IPV6_GRO_H */
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index c6b790001aa77..6d327d6d978c5 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -114,6 +114,7 @@
+ #include <net/sock_reuseport.h>
+ #include <net/addrconf.h>
+ #include <net/udp_tunnel.h>
++#include <net/gro.h>
+ #if IS_ENABLED(CONFIG_IPV6)
+ #include <net/ipv6_stubs.h>
+ #endif
+@@ -555,10 +556,13 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
+ {
+       const struct iphdr *iph = ip_hdr(skb);
+       struct net *net = dev_net(skb->dev);
++      int iif, sdif;
++
++      inet_get_iif_sdif(skb, &iif, &sdif);
+ 
+       return __udp4_lib_lookup(net, iph->saddr, sport,
+-                               iph->daddr, dport, inet_iif(skb),
+-                               inet_sdif(skb), net->ipv4.udp_table, NULL);
++                               iph->daddr, dport, iif,
++                               sdif, net->ipv4.udp_table, NULL);
+ }
+ 
+ /* Must be called under rcu_read_lock().
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index f402946da344b..0f46b3c2e4ac5 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -609,10 +609,13 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+ {
+       const struct iphdr *iph = skb_gro_network_header(skb);
+       struct net *net = dev_net(skb->dev);
++      int iif, sdif;
++
++      inet_get_iif_sdif(skb, &iif, &sdif);
+ 
+       return __udp4_lib_lookup(net, iph->saddr, sport,
+-                               iph->daddr, dport, inet_iif(skb),
+-                               inet_sdif(skb), net->ipv4.udp_table, NULL);
++                               iph->daddr, dport, iif,
++                               sdif, net->ipv4.udp_table, NULL);
+ }
+ 
+ INDIRECT_CALLABLE_SCOPE
+diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
+index d594a0425749b..27292d44df654 100644
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -51,6 +51,7 @@
+ #include <net/inet6_hashtables.h>
+ #include <net/busy_poll.h>
+ #include <net/sock_reuseport.h>
++#include <net/gro.h>
+ 
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
+@@ -300,10 +301,13 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
+ {
+       const struct ipv6hdr *iph = ipv6_hdr(skb);
+       struct net *net = dev_net(skb->dev);
++      int iif, sdif;
++
++      inet6_get_iif_sdif(skb, &iif, &sdif);
+ 
+       return __udp6_lib_lookup(net, &iph->saddr, sport,
+-                               &iph->daddr, dport, inet6_iif(skb),
+-                               inet6_sdif(skb), net->ipv4.udp_table, NULL);
++                               &iph->daddr, dport, iif,
++                               sdif, net->ipv4.udp_table, NULL);
+ }
+ 
+ /* Must be called under rcu_read_lock().
+diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
+index 09fa7a42cb937..6b95ba241ebe2 100644
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -118,10 +118,13 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport,
+ {
+       const struct ipv6hdr *iph = skb_gro_network_header(skb);
+       struct net *net = dev_net(skb->dev);
++      int iif, sdif;
++
++      inet6_get_iif_sdif(skb, &iif, &sdif);
+ 
+       return __udp6_lib_lookup(net, &iph->saddr, sport,
+-                               &iph->daddr, dport, inet6_iif(skb),
+-                               inet6_sdif(skb), net->ipv4.udp_table, NULL);
++                               &iph->daddr, dport, iif,
++                               sdif, net->ipv4.udp_table, NULL);
+ }
+ 
+ INDIRECT_CALLABLE_SCOPE
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-korina-handle-clk-prepare-error-in-korina_probe.patch b/queue-6.4/net-korina-handle-clk-prepare-error-in-korina_probe.patch

new file mode 100644 (file)

index 0000000..da3a956
--- /dev/null
+++ b/queue-6.4/net-korina-handle-clk-prepare-error-in-korina_probe.patch
@@ -0,0 +1,43 @@
+From ccfaf36946d8711b29c50ece3c3540f7a3ada79b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 17:05:35 +0800
+Subject: net: korina: handle clk prepare error in korina_probe()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit 0b6291ad1940c403734312d0e453e8dac9148f69 ]
+
+in korina_probe(), the return value of clk_prepare_enable()
+should be checked since it might fail. we can use
+devm_clk_get_optional_enabled() instead of devm_clk_get_optional()
+and clk_prepare_enable() to automatically handle the error.
+
+Fixes: e4cd854ec487 ("net: korina: Get mdio input clock via common clock framework")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Link: https://lore.kernel.org/r/20230731090535.21416-1-ruc_gongyuanjun@163.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/korina.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
+index 2b9335cb4bb3a..8537578e1cf1d 100644
+--- a/drivers/net/ethernet/korina.c
++++ b/drivers/net/ethernet/korina.c
+@@ -1302,11 +1302,10 @@ static int korina_probe(struct platform_device *pdev)
+       else if (of_get_ethdev_address(pdev->dev.of_node, dev) < 0)
+               eth_hw_addr_random(dev);
+ 
+-      clk = devm_clk_get_optional(&pdev->dev, "mdioclk");
++      clk = devm_clk_get_optional_enabled(&pdev->dev, "mdioclk");
+       if (IS_ERR(clk))
+               return PTR_ERR(clk);
+       if (clk) {
+-              clk_prepare_enable(clk);
+               lp->mii_clock_freq = clk_get_rate(clk);
+       } else {
+               lp->mii_clock_freq = 200000000; /* max possible input clk */
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch b/queue-6.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch

new file mode 100644 (file)

index 0000000..7e71971
--- /dev/null
+++ b/queue-6.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch
@@ -0,0 +1,54 @@
+From 35e127ec86e8074fc5636dbb475b061e38c1feca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 10:42:32 +0300
+Subject: net: ll_temac: fix error checking of irq_of_parse_and_map()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit ef45e8400f5bb66b03cc949f76c80e2a118447de ]
+
+Most kernel functions return negative error codes but some irq functions
+return zero on error.  In this code irq_of_parse_and_map(), returns zero
+and platform_get_irq() returns negative error codes.  We need to handle
+both cases appropriately.
+
+Fixes: 8425c41d1ef7 ("net: ll_temac: Extend support to non-device-tree platforms")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Acked-by: Esben Haabendal <esben@geanix.com>
+Reviewed-by: Yang Yingliang <yangyingliang@huawei.com>
+Reviewed-by: Harini Katakam <harini.katakam@amd.com>
+Link: https://lore.kernel.org/r/3d0aef75-06e0-45a5-a2a6-2cc4738d4143@moroto.mountain
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/ll_temac_main.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
+index e0ac1bcd9925c..49f303353ecb0 100644
+--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
++++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
+@@ -1567,12 +1567,16 @@ static int temac_probe(struct platform_device *pdev)
+       }
+ 
+       /* Error handle returned DMA RX and TX interrupts */
+-      if (lp->rx_irq < 0)
+-              return dev_err_probe(&pdev->dev, lp->rx_irq,
++      if (lp->rx_irq <= 0) {
++              rc = lp->rx_irq ?: -EINVAL;
++              return dev_err_probe(&pdev->dev, rc,
+                                    "could not get DMA RX irq\n");
+-      if (lp->tx_irq < 0)
+-              return dev_err_probe(&pdev->dev, lp->tx_irq,
++      }
++      if (lp->tx_irq <= 0) {
++              rc = lp->tx_irq ?: -EINVAL;
++              return dev_err_probe(&pdev->dev, rc,
+                                    "could not get DMA TX irq\n");
++      }
+ 
+       if (temac_np) {
+               /* Retrieve the MAC address */
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch b/queue-6.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch

new file mode 100644 (file)

index 0000000..3692880
--- /dev/null
+++ b/queue-6.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch
@@ -0,0 +1,44 @@
+From ac4ef6dbc3da4b52ed20b04562cdd0d52150cfee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 20:15:27 +0800
+Subject: net/mlx5: DR, fix memory leak in mlx5dr_cmd_create_reformat_ctx
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit 5dd77585dd9d0e03dd1bceb95f0269a7eaf6b936 ]
+
+when mlx5_cmd_exec failed in mlx5dr_cmd_create_reformat_ctx, the memory
+pointed by 'in' is not released, which will cause memory leak. Move memory
+release after mlx5_cmd_exec.
+
+Fixes: 1d9186476e12 ("net/mlx5: DR, Add direct rule command utilities")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+index 1aa525e509f10..293d2edd03d59 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+@@ -562,11 +562,12 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
+ 
+       err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+       if (err)
+-              return err;
++              goto err_free_in;
+ 
+       *reformat_id = MLX5_GET(alloc_packet_reformat_context_out, out, packet_reformat_id);
+-      kvfree(in);
+ 
++err_free_in:
++      kvfree(in);
+       return err;
+ }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-fix-potential-memory-leak-in-mlx5e_init_rep.patch b/queue-6.4/net-mlx5-fix-potential-memory-leak-in-mlx5e_init_rep.patch

new file mode 100644 (file)

index 0000000..07c5eb6
--- /dev/null
+++ b/queue-6.4/net-mlx5-fix-potential-memory-leak-in-mlx5e_init_rep.patch
@@ -0,0 +1,48 @@
+From 1f4d3594841a7d57108db72864e68b31effabecc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 8 Jul 2023 15:13:07 +0800
+Subject: net/mlx5: fix potential memory leak in mlx5e_init_rep_rx
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit c6cf0b6097bf1bf1b2a89b521e9ecd26b581a93a ]
+
+The memory pointed to by the priv->rx_res pointer is not freed in the error
+path of mlx5e_init_rep_rx, which can lead to a memory leak. Fix by freeing
+the memory in the error path, thereby making the error path identical to
+mlx5e_cleanup_rep_rx().
+
+Fixes: af8bbf730068 ("net/mlx5e: Convert mlx5e_flow_steering member of mlx5e_priv to pointer")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 3e7041bd5705e..95d8714765f70 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -964,7 +964,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+       err = mlx5e_open_drop_rq(priv, &priv->drop_rq);
+       if (err) {
+               mlx5_core_err(mdev, "open drop rq failed, %d\n", err);
+-              return err;
++              goto err_rx_res_free;
+       }
+ 
+       err = mlx5e_rx_res_init(priv->rx_res, priv->mdev, 0,
+@@ -998,6 +998,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
+       mlx5e_rx_res_destroy(priv->rx_res);
+ err_close_drop_rq:
+       mlx5e_close_drop_rq(&priv->drop_rq);
++err_rx_res_free:
+       mlx5e_rx_res_free(priv->rx_res);
+       priv->rx_res = NULL;
+ err_free_fs:
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-fs_chains-fix-ft-prio-if-ignore_flow_level-.patch b/queue-6.4/net-mlx5-fs_chains-fix-ft-prio-if-ignore_flow_level-.patch

new file mode 100644 (file)

index 0000000..201a0a6
--- /dev/null
+++ b/queue-6.4/net-mlx5-fs_chains-fix-ft-prio-if-ignore_flow_level-.patch
@@ -0,0 +1,57 @@
+From 656e68b7632bfe3bcfdff8d085a5a669d25ad5ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 08:32:51 +0300
+Subject: net/mlx5: fs_chains: Fix ft prio if ignore_flow_level is not
+ supported
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 61eab651f6e96791cfad6db45f1107c398699b2d ]
+
+The cited commit sets ft prio to fs_base_prio. But if
+ignore_flow_level it not supported, ft prio must be set based on
+tc filter prio. Otherwise, all the ft prio are the same on the same
+chain. It is invalid if ignore_flow_level is not supported.
+
+Fix it by setting ft prio based on tc filter prio and setting
+fs_base_prio to 0 for fdb.
+
+Fixes: 8e80e5648092 ("net/mlx5: fs_chains: Refactor to detach chains from tc usage")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 1 -
+ drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c    | 2 +-
+ 2 files changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 178880ba7c7b3..c1f419b36289c 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -1376,7 +1376,6 @@ esw_chains_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *miss_fdb)
+ 
+       esw_init_chains_offload_flags(esw, &attr.flags);
+       attr.ns = MLX5_FLOW_NAMESPACE_FDB;
+-      attr.fs_base_prio = FDB_TC_OFFLOAD;
+       attr.max_grp_num = esw->params.large_group_num;
+       attr.default_ft = miss_fdb;
+       attr.mapping = esw->offloads.reg_c0_obj_pool;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+index db9df9798ffac..a80ecb672f33d 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_chains.c
+@@ -178,7 +178,7 @@ mlx5_chains_create_table(struct mlx5_fs_chains *chains,
+       if (!mlx5_chains_ignore_flow_level_supported(chains) ||
+           (chain == 0 && prio == 1 && level == 0)) {
+               ft_attr.level = chains->fs_base_level;
+-              ft_attr.prio = chains->fs_base_prio;
++              ft_attr.prio = chains->fs_base_prio + prio - 1;
+               ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
+                       mlx5_get_fdb_sub_ns(chains->dev, chain) :
+                       mlx5_get_flow_namespace(chains->dev, chains->ns);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-fs_core-make-find_closest_ft-more-generic.patch b/queue-6.4/net-mlx5-fs_core-make-find_closest_ft-more-generic.patch

new file mode 100644 (file)

index 0000000..1df717e
--- /dev/null
+++ b/queue-6.4/net-mlx5-fs_core-make-find_closest_ft-more-generic.patch
@@ -0,0 +1,120 @@
+From 260ae3f149d4144e98e5fc45bdb0b947c113b651 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 14:58:40 +0300
+Subject: net/mlx5: fs_core: Make find_closest_ft more generic
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit 618d28a535a0582617465d14e05f3881736a2962 ]
+
+As find_closest_ft_recursive is called to find the closest FT, the
+first parameter of find_closest_ft can be changed from fs_prio to
+fs_node. Thus this function is extended to find the closest FT for the
+nodes of any type, not only prios, but also the sub namespaces.
+
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/d3962c2b443ec8dde7a740dc742a1f052d5e256c.1690803944.git.leonro@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: c635ca45a7a2 ("net/mlx5: fs_core: Skip the FTs in the same FS_TYPE_PRIO_CHAINS fs_prio")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/fs_core.c | 29 +++++++++----------
+ 1 file changed, 14 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index 19da02c416161..852e265541d19 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -905,18 +905,17 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
+       return ft;
+ }
+ 
+-/* If reverse is false then return the first flow table in next priority of
+- * prio in the tree, else return the last flow table in the previous priority
+- * of prio in the tree.
++/* If reverse is false then return the first flow table next to the passed node
++ * in the tree, else return the last flow table before the node in the tree.
+  */
+-static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool reverse)
++static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse)
+ {
+       struct mlx5_flow_table *ft = NULL;
+       struct fs_node *curr_node;
+       struct fs_node *parent;
+ 
+-      parent = prio->node.parent;
+-      curr_node = &prio->node;
++      parent = node->parent;
++      curr_node = node;
+       while (!ft && parent) {
+               ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
+               curr_node = parent;
+@@ -926,15 +925,15 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_prio *prio, bool revers
+ }
+ 
+ /* Assuming all the tree is locked by mutex chain lock */
+-static struct mlx5_flow_table *find_next_chained_ft(struct fs_prio *prio)
++static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
+ {
+-      return find_closest_ft(prio, false);
++      return find_closest_ft(node, false);
+ }
+ 
+ /* Assuming all the tree is locked by mutex chain lock */
+-static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
++static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
+ {
+-      return find_closest_ft(prio, true);
++      return find_closest_ft(node, true);
+ }
+ 
+ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+@@ -946,7 +945,7 @@ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+       next_ns = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS;
+       fs_get_obj(prio, next_ns ? ft->ns->node.parent : ft->node.parent);
+ 
+-      return find_next_chained_ft(prio);
++      return find_next_chained_ft(&prio->node);
+ }
+ 
+ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+@@ -977,7 +976,7 @@ static int connect_prev_fts(struct mlx5_core_dev *dev,
+ {
+       struct mlx5_flow_table *prev_ft;
+ 
+-      prev_ft = find_prev_chained_ft(prio);
++      prev_ft = find_prev_chained_ft(&prio->node);
+       if (prev_ft) {
+               struct fs_prio *prev_prio;
+ 
+@@ -1123,7 +1122,7 @@ static int connect_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table
+               if (err)
+                       return err;
+ 
+-              next_ft = first_ft ? first_ft : find_next_chained_ft(prio);
++              next_ft = first_ft ? first_ft : find_next_chained_ft(&prio->node);
+               err = connect_fwd_rules(dev, ft, next_ft);
+               if (err)
+                       return err;
+@@ -1198,7 +1197,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
+ 
+       tree_init_node(&ft->node, del_hw_flow_table, del_sw_flow_table);
+       next_ft = unmanaged ? ft_attr->next_ft :
+-                            find_next_chained_ft(fs_prio);
++                            find_next_chained_ft(&fs_prio->node);
+       ft->def_miss_action = ns->def_miss_action;
+       ft->ns = ns;
+       err = root->cmds->create_flow_table(root, ft, ft_attr, next_ft);
+@@ -2201,7 +2200,7 @@ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+ 
+       if (!list_is_last(&ft->node.list, &prio->node.children))
+               return list_next_entry(ft, node.list);
+-      return find_next_chained_ft(prio);
++      return find_next_chained_ft(&prio->node);
+ }
+ 
+ static int update_root_ft_destroy(struct mlx5_flow_table *ft)
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-fs_core-skip-the-fts-in-the-same-fs_type_pr.patch b/queue-6.4/net-mlx5-fs_core-skip-the-fts-in-the-same-fs_type_pr.patch

new file mode 100644 (file)

index 0000000..f166c71
--- /dev/null
+++ b/queue-6.4/net-mlx5-fs_core-skip-the-fts-in-the-same-fs_type_pr.patch
@@ -0,0 +1,196 @@
+From 07fa6bd662b6d71316738f0482f2e83dc76262b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 14:58:41 +0300
+Subject: net/mlx5: fs_core: Skip the FTs in the same FS_TYPE_PRIO_CHAINS
+ fs_prio
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit c635ca45a7a2023904a1f851e99319af7b87017d ]
+
+In the cited commit, new type of FS_TYPE_PRIO_CHAINS fs_prio was added
+to support multiple parallel namespaces for multi-chains. And we skip
+all the flow tables under the fs_node of this type unconditionally,
+when searching for the next or previous flow table to connect for a
+new table.
+
+As this search function is also used for find new root table when the
+old one is being deleted, it will skip the entire FS_TYPE_PRIO_CHAINS
+fs_node next to the old root. However, new root table should be chosen
+from it if there is any table in it. Fix it by skipping only the flow
+tables in the same FS_TYPE_PRIO_CHAINS fs_node when finding the
+closest FT for a fs_node.
+
+Besides, complete the connecting from FTs of previous priority of prio
+because there should be multiple prevs after this fs_prio type is
+introduced. And also the next FT should be chosen from the first flow
+table next to the prio in the same FS_TYPE_PRIO_CHAINS fs_prio, if
+this prio is the first child.
+
+Fixes: 328edb499f99 ("net/mlx5: Split FDB fast path prio to multiple namespaces")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Paul Blakey <paulb@nvidia.com>
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/7a95754df479e722038996c97c97b062b372591f.1690803944.git.leonro@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/fs_core.c | 80 +++++++++++++++++--
+ 1 file changed, 72 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+index 852e265541d19..5f87c446d3d97 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+@@ -889,7 +889,7 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
+       struct fs_node *iter = list_entry(start, struct fs_node, list);
+       struct mlx5_flow_table *ft = NULL;
+ 
+-      if (!root || root->type == FS_TYPE_PRIO_CHAINS)
++      if (!root)
+               return NULL;
+ 
+       list_for_each_advance_continue(iter, &root->children, reverse) {
+@@ -905,19 +905,42 @@ static struct mlx5_flow_table *find_closest_ft_recursive(struct fs_node  *root,
+       return ft;
+ }
+ 
++static struct fs_node *find_prio_chains_parent(struct fs_node *parent,
++                                             struct fs_node **child)
++{
++      struct fs_node *node = NULL;
++
++      while (parent && parent->type != FS_TYPE_PRIO_CHAINS) {
++              node = parent;
++              parent = parent->parent;
++      }
++
++      if (child)
++              *child = node;
++
++      return parent;
++}
++
+ /* If reverse is false then return the first flow table next to the passed node
+  * in the tree, else return the last flow table before the node in the tree.
++ * If skip is true, skip the flow tables in the same prio_chains prio.
+  */
+-static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse)
++static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool reverse,
++                                             bool skip)
+ {
++      struct fs_node *prio_chains_parent = NULL;
+       struct mlx5_flow_table *ft = NULL;
+       struct fs_node *curr_node;
+       struct fs_node *parent;
+ 
++      if (skip)
++              prio_chains_parent = find_prio_chains_parent(node, NULL);
+       parent = node->parent;
+       curr_node = node;
+       while (!ft && parent) {
+-              ft = find_closest_ft_recursive(parent, &curr_node->list, reverse);
++              if (parent != prio_chains_parent)
++                      ft = find_closest_ft_recursive(parent, &curr_node->list,
++                                                     reverse);
+               curr_node = parent;
+               parent = curr_node->parent;
+       }
+@@ -927,13 +950,13 @@ static struct mlx5_flow_table *find_closest_ft(struct fs_node *node, bool revers
+ /* Assuming all the tree is locked by mutex chain lock */
+ static struct mlx5_flow_table *find_next_chained_ft(struct fs_node *node)
+ {
+-      return find_closest_ft(node, false);
++      return find_closest_ft(node, false, true);
+ }
+ 
+ /* Assuming all the tree is locked by mutex chain lock */
+ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_node *node)
+ {
+-      return find_closest_ft(node, true);
++      return find_closest_ft(node, true, true);
+ }
+ 
+ static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+@@ -969,21 +992,55 @@ static int connect_fts_in_prio(struct mlx5_core_dev *dev,
+       return 0;
+ }
+ 
++static struct mlx5_flow_table *find_closet_ft_prio_chains(struct fs_node *node,
++                                                        struct fs_node *parent,
++                                                        struct fs_node **child,
++                                                        bool reverse)
++{
++      struct mlx5_flow_table *ft;
++
++      ft = find_closest_ft(node, reverse, false);
++
++      if (ft && parent == find_prio_chains_parent(&ft->node, child))
++              return ft;
++
++      return NULL;
++}
++
+ /* Connect flow tables from previous priority of prio to ft */
+ static int connect_prev_fts(struct mlx5_core_dev *dev,
+                           struct mlx5_flow_table *ft,
+                           struct fs_prio *prio)
+ {
++      struct fs_node *prio_parent, *parent = NULL, *child, *node;
+       struct mlx5_flow_table *prev_ft;
++      int err = 0;
++
++      prio_parent = find_prio_chains_parent(&prio->node, &child);
++
++      /* return directly if not under the first sub ns of prio_chains prio */
++      if (prio_parent && !list_is_first(&child->list, &prio_parent->children))
++              return 0;
+ 
+       prev_ft = find_prev_chained_ft(&prio->node);
+-      if (prev_ft) {
++      while (prev_ft) {
+               struct fs_prio *prev_prio;
+ 
+               fs_get_obj(prev_prio, prev_ft->node.parent);
+-              return connect_fts_in_prio(dev, prev_prio, ft);
++              err = connect_fts_in_prio(dev, prev_prio, ft);
++              if (err)
++                      break;
++
++              if (!parent) {
++                      parent = find_prio_chains_parent(&prev_prio->node, &child);
++                      if (!parent)
++                              break;
++              }
++
++              node = child;
++              prev_ft = find_closet_ft_prio_chains(node, parent, &child, true);
+       }
+-      return 0;
++      return err;
+ }
+ 
+ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
+@@ -2194,12 +2251,19 @@ EXPORT_SYMBOL(mlx5_del_flow_rules);
+ /* Assuming prio->node.children(flow tables) is sorted by level */
+ static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
+ {
++      struct fs_node *prio_parent, *child;
+       struct fs_prio *prio;
+ 
+       fs_get_obj(prio, ft->node.parent);
+ 
+       if (!list_is_last(&ft->node.list, &prio->node.children))
+               return list_next_entry(ft, node.list);
++
++      prio_parent = find_prio_chains_parent(&prio->node, &child);
++
++      if (prio_parent && list_is_first(&child->list, &prio_parent->children))
++              return find_closest_ft(&prio->node, false, false);
++
+       return find_next_chained_ft(&prio->node);
+ }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-honor-user-input-for-migratable-port-fn-att.patch b/queue-6.4/net-mlx5-honor-user-input-for-migratable-port-fn-att.patch

new file mode 100644 (file)

index 0000000..215dfa0
--- /dev/null
+++ b/queue-6.4/net-mlx5-honor-user-input-for-migratable-port-fn-att.patch
@@ -0,0 +1,38 @@
+From 7300abc90223a347c9d63cdb1a1ca32259831aaa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 17:34:44 +0300
+Subject: net/mlx5: Honor user input for migratable port fn attr
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 0507f2c8be0d345fe7014147c027cea6dc1c00a4 ]
+
+Currently, whenever a user is setting migratable port fn attr, the
+driver is always turn migratable capability on.
+Fix it by honor the user input
+
+Fixes: e5b9642a33be ("net/mlx5: E-Switch, Implement devlink port function cmds to control migratable")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Roi Dayan <roid@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+index 8d19c20d3447e..178880ba7c7b3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+@@ -4073,7 +4073,7 @@ int mlx5_devlink_port_fn_migratable_set(struct devlink_port *port, bool enable,
+       }
+ 
+       hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability);
+-      MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, 1);
++      MLX5_SET(cmd_hca_cap_2, hca_caps, migratable, enable);
+ 
+       err = mlx5_vport_set_other_func_cap(esw->dev, hca_caps, vport->vport,
+                                           MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE2);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5-unregister-devlink-params-in-case-interface.patch b/queue-6.4/net-mlx5-unregister-devlink-params-in-case-interface.patch

new file mode 100644 (file)

index 0000000..266747e
--- /dev/null
+++ b/queue-6.4/net-mlx5-unregister-devlink-params-in-case-interface.patch
@@ -0,0 +1,70 @@
+From 85a2046bc3375bf7265c09ccd2865eeded873d3f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 25 Jun 2023 11:07:38 +0300
+Subject: net/mlx5: Unregister devlink params in case interface is down
+
+From: Shay Drory <shayd@nvidia.com>
+
+[ Upstream commit 53d737dfd3d7b023fa9fa445ea3f3db0ac9da402 ]
+
+Currently, in case an interface is down, mlx5 driver doesn't
+unregister its devlink params, which leads to this WARN[1].
+Fix it by unregistering devlink params in that case as well.
+
+[1]
+[  295.244769 ] WARNING: CPU: 15 PID: 1 at net/core/devlink.c:9042 devlink_free+0x174/0x1fc
+[  295.488379 ] CPU: 15 PID: 1 Comm: shutdown Tainted: G S         OE 5.15.0-1017.19.3.g0677e61-bluefield #g0677e61
+[  295.509330 ] Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS 4.2.0.12761 Jun  6 2023
+[  295.543096 ] pc : devlink_free+0x174/0x1fc
+[  295.551104 ] lr : mlx5_devlink_free+0x18/0x2c [mlx5_core]
+[  295.561816 ] sp : ffff80000809b850
+[  295.711155 ] Call trace:
+[  295.716030 ]  devlink_free+0x174/0x1fc
+[  295.723346 ]  mlx5_devlink_free+0x18/0x2c [mlx5_core]
+[  295.733351 ]  mlx5_sf_dev_remove+0x98/0xb0 [mlx5_core]
+[  295.743534 ]  auxiliary_bus_remove+0x2c/0x50
+[  295.751893 ]  __device_release_driver+0x19c/0x280
+[  295.761120 ]  device_release_driver+0x34/0x50
+[  295.769649 ]  bus_remove_device+0xdc/0x170
+[  295.777656 ]  device_del+0x17c/0x3a4
+[  295.784620 ]  mlx5_sf_dev_remove+0x28/0xf0 [mlx5_core]
+[  295.794800 ]  mlx5_sf_dev_table_destroy+0x98/0x110 [mlx5_core]
+[  295.806375 ]  mlx5_unload+0x34/0xd0 [mlx5_core]
+[  295.815339 ]  mlx5_unload_one+0x70/0xe4 [mlx5_core]
+[  295.824998 ]  shutdown+0xb0/0xd8 [mlx5_core]
+[  295.833439 ]  pci_device_shutdown+0x3c/0xa0
+[  295.841651 ]  device_shutdown+0x170/0x340
+[  295.849486 ]  __do_sys_reboot+0x1f4/0x2a0
+[  295.857322 ]  __arm64_sys_reboot+0x2c/0x40
+[  295.865329 ]  invoke_syscall+0x78/0x100
+[  295.872817 ]  el0_svc_common.constprop.0+0x54/0x184
+[  295.882392 ]  do_el0_svc+0x30/0xac
+[  295.889008 ]  el0_svc+0x48/0x160
+[  295.895278 ]  el0t_64_sync_handler+0xa4/0x130
+[  295.903807 ]  el0t_64_sync+0x1a4/0x1a8
+[  295.911120 ] ---[ end trace 4f1d2381d00d9dce  ]---
+
+Fixes: fe578cbb2f05 ("net/mlx5: Move devlink registration before mlx5_load")
+Signed-off-by: Shay Drory <shayd@nvidia.com>
+Reviewed-by: Maher Sanalla <msanalla@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+index d6ee016deae17..c7a06c8bbb7a3 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
+@@ -1456,6 +1456,7 @@ void mlx5_uninit_one(struct mlx5_core_dev *dev)
+       if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
+               mlx5_core_warn(dev, "%s: interface is down, NOP\n",
+                              __func__);
++              mlx5_devlink_params_unregister(priv_to_devlink(dev));
+               mlx5_cleanup_once(dev);
+               goto out;
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-don-t-hold-encap-tbl-lock-if-there-is-no-e.patch b/queue-6.4/net-mlx5e-don-t-hold-encap-tbl-lock-if-there-is-no-e.patch

new file mode 100644 (file)

index 0000000..5346b5d
--- /dev/null
+++ b/queue-6.4/net-mlx5e-don-t-hold-encap-tbl-lock-if-there-is-no-e.patch
@@ -0,0 +1,106 @@
+From 6d1ce639479b13a9ff5e58f2c199016a66881bc5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 11:32:03 +0300
+Subject: net/mlx5e: Don't hold encap tbl lock if there is no encap action
+
+From: Chris Mi <cmi@nvidia.com>
+
+[ Upstream commit 93a331939d1d1c6c3422bc09ec43cac658594b34 ]
+
+The cited commit holds encap tbl lock unconditionally when setting
+up dests. But it may cause the following deadlock:
+
+ PID: 1063722  TASK: ffffa062ca5d0000  CPU: 13   COMMAND: "handler8"
+  #0 [ffffb14de05b7368] __schedule at ffffffffa1d5aa91
+  #1 [ffffb14de05b7410] schedule at ffffffffa1d5afdb
+  #2 [ffffb14de05b7430] schedule_preempt_disabled at ffffffffa1d5b528
+  #3 [ffffb14de05b7440] __mutex_lock at ffffffffa1d5d6cb
+  #4 [ffffb14de05b74e8] mutex_lock_nested at ffffffffa1d5ddeb
+  #5 [ffffb14de05b74f8] mlx5e_tc_tun_encap_dests_set at ffffffffc12f2096 [mlx5_core]
+  #6 [ffffb14de05b7568] post_process_attr at ffffffffc12d9fc5 [mlx5_core]
+  #7 [ffffb14de05b75a0] mlx5e_tc_add_fdb_flow at ffffffffc12de877 [mlx5_core]
+  #8 [ffffb14de05b75f0] __mlx5e_add_fdb_flow at ffffffffc12e0eef [mlx5_core]
+  #9 [ffffb14de05b7660] mlx5e_tc_add_flow at ffffffffc12e12f7 [mlx5_core]
+ #10 [ffffb14de05b76b8] mlx5e_configure_flower at ffffffffc12e1686 [mlx5_core]
+ #11 [ffffb14de05b7720] mlx5e_rep_indr_offload at ffffffffc12e3817 [mlx5_core]
+ #12 [ffffb14de05b7730] mlx5e_rep_indr_setup_tc_cb at ffffffffc12e388a [mlx5_core]
+ #13 [ffffb14de05b7740] tc_setup_cb_add at ffffffffa1ab2ba8
+ #14 [ffffb14de05b77a0] fl_hw_replace_filter at ffffffffc0bdec2f [cls_flower]
+ #15 [ffffb14de05b7868] fl_change at ffffffffc0be6caa [cls_flower]
+ #16 [ffffb14de05b7908] tc_new_tfilter at ffffffffa1ab71f0
+
+[1031218.028143]  wait_for_completion+0x24/0x30
+[1031218.028589]  mlx5e_update_route_decap_flows+0x9a/0x1e0 [mlx5_core]
+[1031218.029256]  mlx5e_tc_fib_event_work+0x1ad/0x300 [mlx5_core]
+[1031218.029885]  process_one_work+0x24e/0x510
+
+Actually no need to hold encap tbl lock if there is no encap action.
+Fix it by checking if encap action exists or not before holding
+encap tbl lock.
+
+Fixes: 37c3b9fa7ccf ("net/mlx5e: Prevent encap offload when neigh update is running")
+Signed-off-by: Chris Mi <cmi@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en/tc_tun_encap.c      |  3 ---
+ .../net/ethernet/mellanox/mlx5/core/en_tc.c   | 21 ++++++++++++++++---
+ 2 files changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+index f0c3464f037f4..0c88cf47af01b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
+@@ -1030,9 +1030,6 @@ int mlx5e_tc_tun_encap_dests_set(struct mlx5e_priv *priv,
+       int out_index;
+       int err = 0;
+ 
+-      if (!mlx5e_is_eswitch_flow(flow))
+-              return 0;
+-
+       parse_attr = attr->parse_attr;
+       esw_attr = attr->esw_attr;
+       *vf_tun = false;
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+index ed05ac8ae1de5..e002f013fa015 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -1725,6 +1725,19 @@ verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
+       return 0;
+ }
+ 
++static bool
++has_encap_dests(struct mlx5_flow_attr *attr)
++{
++      struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
++      int out_index;
++
++      for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
++              if (esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP)
++                      return true;
++
++      return false;
++}
++
+ static int
+ post_process_attr(struct mlx5e_tc_flow *flow,
+                 struct mlx5_flow_attr *attr,
+@@ -1737,9 +1750,11 @@ post_process_attr(struct mlx5e_tc_flow *flow,
+       if (err)
+               goto err_out;
+ 
+-      err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
+-      if (err)
+-              goto err_out;
++      if (mlx5e_is_eswitch_flow(flow) && has_encap_dests(attr)) {
++              err = mlx5e_tc_tun_encap_dests_set(flow->priv, flow, attr, extack, &vf_tun);
++              if (err)
++                      goto err_out;
++      }
+ 
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
+               err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-fix-crash-moving-to-switchdev-mode-when-nt.patch b/queue-6.4/net-mlx5e-fix-crash-moving-to-switchdev-mode-when-nt.patch

new file mode 100644 (file)

index 0000000..2792051
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-crash-moving-to-switchdev-mode-when-nt.patch
@@ -0,0 +1,82 @@
+From 3e4bc90736aae3afb4f8bcb372ed915c36327df2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 20:11:14 +0300
+Subject: net/mlx5e: Fix crash moving to switchdev mode when ntuple offload is
+ set
+
+From: Amir Tzin <amirtz@nvidia.com>
+
+[ Upstream commit 3ec43c1b082a8804472430e1253544d75f4b540e ]
+
+Moving to switchdev mode with ntuple offload on causes the kernel to
+crash since fs->arfs is freed during nic profile cleanup flow.
+
+Ntuple offload is not supported in switchdev mode and it is already
+unset by mlx5 fix feature ndo in switchdev mode. Verify fs->arfs is
+valid before disabling it.
+
+trace:
+[] RIP: 0010:_raw_spin_lock_bh+0x17/0x30
+[] arfs_del_rules+0x44/0x1a0 [mlx5_core]
+[] mlx5e_arfs_disable+0xe/0x20 [mlx5_core]
+[] mlx5e_handle_feature+0x3d/0xb0 [mlx5_core]
+[] ? __rtnl_unlock+0x25/0x50
+[] mlx5e_set_features+0xfe/0x160 [mlx5_core]
+[] __netdev_update_features+0x278/0xa50
+[] ? netdev_run_todo+0x5e/0x2a0
+[] netdev_update_features+0x22/0x70
+[] ? _cond_resched+0x15/0x30
+[] mlx5e_attach_netdev+0x12a/0x1e0 [mlx5_core]
+[] mlx5e_netdev_attach_profile+0xa1/0xc0 [mlx5_core]
+[] mlx5e_netdev_change_profile+0x77/0xe0 [mlx5_core]
+[] mlx5e_vport_rep_load+0x1ed/0x290 [mlx5_core]
+[] mlx5_esw_offloads_rep_load+0x88/0xd0 [mlx5_core]
+[] esw_offloads_load_rep.part.38+0x31/0x50 [mlx5_core]
+[] esw_offloads_enable+0x6c5/0x710 [mlx5_core]
+[] mlx5_eswitch_enable_locked+0x1bb/0x290 [mlx5_core]
+[] mlx5_devlink_eswitch_mode_set+0x14f/0x320 [mlx5_core]
+[] devlink_nl_cmd_eswitch_set_doit+0x94/0x120
+[] genl_family_rcv_msg_doit.isra.17+0x113/0x150
+[] genl_family_rcv_msg+0xb7/0x170
+[] ? devlink_nl_cmd_port_split_doit+0x100/0x100
+[] genl_rcv_msg+0x47/0xa0
+[] ? genl_family_rcv_msg+0x170/0x170
+[] netlink_rcv_skb+0x4c/0x130
+[] genl_rcv+0x24/0x40
+[] netlink_unicast+0x19a/0x230
+[] netlink_sendmsg+0x204/0x3d0
+[] sock_sendmsg+0x50/0x60
+
+Fixes: 90b22b9bcd24 ("net/mlx5e: Disable Rx ntuple offload for uplink representor")
+Signed-off-by: Amir Tzin <amirtz@nvidia.com>
+Reviewed-by: Aya Levin <ayal@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+index bed0c2d043e70..329d8c90facdd 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+@@ -135,6 +135,16 @@ static void arfs_del_rules(struct mlx5e_flow_steering *fs);
+ 
+ int mlx5e_arfs_disable(struct mlx5e_flow_steering *fs)
+ {
++      /* Moving to switchdev mode, fs->arfs is freed by mlx5e_nic_profile
++       * cleanup_rx callback and it is not recreated when
++       * mlx5e_uplink_rep_profile is loaded as mlx5e_create_flow_steering()
++       * is not called by the uplink_rep profile init_rx callback. Thus, if
++       * ntuple is set, moving to switchdev flow will enter this function
++       * with fs->arfs nullified.
++       */
++      if (!mlx5e_fs_get_arfs(fs))
++              return 0;
++
+       arfs_del_rules(fs);
+ 
+       return arfs_disable(fs);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-fix-double-free-in-macsec_fs_tx_create_cry.patch b/queue-6.4/net-mlx5e-fix-double-free-in-macsec_fs_tx_create_cry.patch

new file mode 100644 (file)

index 0000000..b33d887
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-double-free-in-macsec_fs_tx_create_cry.patch
@@ -0,0 +1,40 @@
+From 960f9c48f1657ecaf54ebde6292f5226a8bcb732 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jul 2023 15:06:40 +0800
+Subject: net/mlx5e: fix double free in macsec_fs_tx_create_crypto_table_groups
+
+From: Zhengchao Shao <shaozhengchao@huawei.com>
+
+[ Upstream commit aeb660171b0663847fa04806a96302ac6112ad26 ]
+
+In function macsec_fs_tx_create_crypto_table_groups(), when the ft->g
+memory is successfully allocated but the 'in' memory fails to be
+allocated, the memory pointed to by ft->g is released once. And in function
+macsec_fs_tx_create(), macsec_fs_tx_destroy() is called to release the
+memory pointed to by ft->g again. This will cause double free problem.
+
+Fixes: e467b283ffd5 ("net/mlx5e: Add MACsec TX steering rules")
+Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+index 7fc901a6ec5fc..414e285848813 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec_fs.c
+@@ -161,6 +161,7 @@ static int macsec_fs_tx_create_crypto_table_groups(struct mlx5e_flow_table *ft)
+ 
+       if (!in) {
+               kfree(ft->g);
++              ft->g = NULL;
+               return -ENOMEM;
+       }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch b/queue-6.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch

new file mode 100644 (file)

index 0000000..97615a8
--- /dev/null
+++ b/queue-6.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch
@@ -0,0 +1,39 @@
+From 8548d539f6a605fb94d5b3f218dc56665ad051cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Jul 2023 14:56:55 +0800
+Subject: net/mlx5e: fix return value check in mlx5e_ipsec_remove_trailer()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit e5bcb7564d3bd0c88613c76963c5349be9c511c5 ]
+
+mlx5e_ipsec_remove_trailer() should return an error code if function
+pskb_trim() returns an unexpected value.
+
+Fixes: 2ac9cfe78223 ("net/mlx5e: IPSec, Add Innova IPSec offload TX data path")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+index eab5bc718771f..8d995e3048692 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+@@ -58,7 +58,9 @@ static int mlx5e_ipsec_remove_trailer(struct sk_buff *skb, struct xfrm_state *x)
+ 
+       trailer_len = alen + plen + 2;
+ 
+-      pskb_trim(skb, skb->len - trailer_len);
++      ret = pskb_trim(skb, skb->len - trailer_len);
++      if (unlikely(ret))
++              return ret;
+       if (skb->protocol == htons(ETH_P_IP)) {
+               ipv4hdr->tot_len = htons(ntohs(ipv4hdr->tot_len) - trailer_len);
+               ip_send_check(ipv4hdr);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-ktls-fix-protection-domain-in-use-syndrome.patch b/queue-6.4/net-mlx5e-ktls-fix-protection-domain-in-use-syndrome.patch

new file mode 100644 (file)

index 0000000..7836995
--- /dev/null
+++ b/queue-6.4/net-mlx5e-ktls-fix-protection-domain-in-use-syndrome.patch
@@ -0,0 +1,132 @@
+From 2bc86e8aaee38506003f1e8b628f4e5ff9207a90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 May 2023 03:36:10 +0000
+Subject: net/mlx5e: kTLS, Fix protection domain in use syndrome when devlink
+ reload
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit 3e4cf1dd2ce413f4be3e2c9062fb470e2ad2be88 ]
+
+There are DEK objects cached in DEK pool after kTLS is used, and they
+are freed only in mlx5e_ktls_cleanup().
+
+mlx5e_destroy_mdev_resources() is called in mlx5e_suspend() to
+free mdev resources, including protection domain (PD). However, PD is
+still referenced by the cached DEK objects in this case, because
+profile->cleanup() (and therefore mlx5e_ktls_cleanup()) is called
+after mlx5e_suspend() during devlink reload. So the following FW
+syndrome is generated:
+
+ mlx5_cmd_out_err:803:(pid 12948): DEALLOC_PD(0x801) op_mod(0x0) failed,
+    status bad resource state(0x9), syndrome (0xef0c8a), err(-22)
+
+To avoid this syndrome, move DEK pool destruction to
+mlx5e_ktls_cleanup_tx(), which is called by profile->cleanup_tx(). And
+move pool creation to mlx5e_ktls_init_tx() for symmetry.
+
+Fixes: f741db1a5171 ("net/mlx5e: kTLS, Improve connection rate by using fast update encryption key")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../mellanox/mlx5/core/en_accel/ktls.c        |  8 -----
+ .../mellanox/mlx5/core/en_accel/ktls_tx.c     | 29 +++++++++++++++++--
+ 2 files changed, 26 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+index cf704f106b7c2..984fa04bd331b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+@@ -188,7 +188,6 @@ static void mlx5e_tls_debugfs_init(struct mlx5e_tls *tls,
+ 
+ int mlx5e_ktls_init(struct mlx5e_priv *priv)
+ {
+-      struct mlx5_crypto_dek_pool *dek_pool;
+       struct mlx5e_tls *tls;
+ 
+       if (!mlx5e_is_ktls_device(priv->mdev))
+@@ -199,12 +198,6 @@ int mlx5e_ktls_init(struct mlx5e_priv *priv)
+               return -ENOMEM;
+       tls->mdev = priv->mdev;
+ 
+-      dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
+-      if (IS_ERR(dek_pool)) {
+-              kfree(tls);
+-              return PTR_ERR(dek_pool);
+-      }
+-      tls->dek_pool = dek_pool;
+       priv->tls = tls;
+ 
+       mlx5e_tls_debugfs_init(tls, priv->dfs_root);
+@@ -222,7 +215,6 @@ void mlx5e_ktls_cleanup(struct mlx5e_priv *priv)
+       debugfs_remove_recursive(tls->debugfs.dfs);
+       tls->debugfs.dfs = NULL;
+ 
+-      mlx5_crypto_dek_pool_destroy(tls->dek_pool);
+       kfree(priv->tls);
+       priv->tls = NULL;
+ }
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+index 0e4c0a093293a..c49363dd6bf9a 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+@@ -908,28 +908,51 @@ static void mlx5e_tls_tx_debugfs_init(struct mlx5e_tls *tls,
+ 
+ int mlx5e_ktls_init_tx(struct mlx5e_priv *priv)
+ {
++      struct mlx5_crypto_dek_pool *dek_pool;
+       struct mlx5e_tls *tls = priv->tls;
++      int err;
++
++      if (!mlx5e_is_ktls_device(priv->mdev))
++              return 0;
++
++      /* DEK pool could be used by either or both of TX and RX. But we have to
++       * put the creation here to avoid syndrome when doing devlink reload.
++       */
++      dek_pool = mlx5_crypto_dek_pool_create(priv->mdev, MLX5_ACCEL_OBJ_TLS_KEY);
++      if (IS_ERR(dek_pool))
++              return PTR_ERR(dek_pool);
++      tls->dek_pool = dek_pool;
+ 
+       if (!mlx5e_is_ktls_tx(priv->mdev))
+               return 0;
+ 
+       priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats);
+-      if (!priv->tls->tx_pool)
+-              return -ENOMEM;
++      if (!priv->tls->tx_pool) {
++              err = -ENOMEM;
++              goto err_tx_pool_init;
++      }
+ 
+       mlx5e_tls_tx_debugfs_init(tls, tls->debugfs.dfs);
+ 
+       return 0;
++
++err_tx_pool_init:
++      mlx5_crypto_dek_pool_destroy(dek_pool);
++      return err;
+ }
+ 
+ void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv)
+ {
+       if (!mlx5e_is_ktls_tx(priv->mdev))
+-              return;
++              goto dek_pool_destroy;
+ 
+       debugfs_remove_recursive(priv->tls->debugfs.dfs_tx);
+       priv->tls->debugfs.dfs_tx = NULL;
+ 
+       mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool);
+       priv->tls->tx_pool = NULL;
++
++dek_pool_destroy:
++      if (mlx5e_is_ktls_device(priv->mdev))
++              mlx5_crypto_dek_pool_destroy(priv->tls->dek_pool);
+ }
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-move-representor-neigh-cleanup-to-profile-.patch b/queue-6.4/net-mlx5e-move-representor-neigh-cleanup-to-profile-.patch

new file mode 100644 (file)

index 0000000..2882a8e
--- /dev/null
+++ b/queue-6.4/net-mlx5e-move-representor-neigh-cleanup-to-profile-.patch
@@ -0,0 +1,176 @@
+From ad0ed808db46583f399d542c9e342bff8c7f8332 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 08:28:16 +0000
+Subject: net/mlx5e: Move representor neigh cleanup to profile cleanup_tx
+
+From: Jianbo Liu <jianbol@nvidia.com>
+
+[ Upstream commit d03b6e6f31820b84f7449cca022047f36c42bc3f ]
+
+For IP tunnel encapsulation in ECMP (Equal-Cost Multipath) mode, as
+the flow is duplicated to the peer eswitch, the related neighbour
+information on the peer uplink representor is created as well.
+
+In the cited commit, eswitch devcom unpair is moved to uplink unload
+API, specifically the profile->cleanup_tx. If there is a encap rule
+offloaded in ECMP mode, when one eswitch does unpair (because of
+unloading the driver, for instance), and the peer rule from the peer
+eswitch is going to be deleted, the use-after-free error is triggered
+while accessing neigh info, as it is already cleaned up in uplink's
+profile->disable, which is before its profile->cleanup_tx.
+
+To fix this issue, move the neigh cleanup to profile's cleanup_tx
+callback, and after mlx5e_cleanup_uplink_rep_tx is called. The neigh
+init is moved to init_tx for symmeter.
+
+[ 2453.376299] BUG: KASAN: slab-use-after-free in mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core]
+[ 2453.379125] Read of size 4 at addr ffff888127af9008 by task modprobe/2496
+
+[ 2453.381542] CPU: 7 PID: 2496 Comm: modprobe Tainted: G    B              6.4.0-rc7+ #15
+[ 2453.383386] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+[ 2453.384335] Call Trace:
+[ 2453.384625]  <TASK>
+[ 2453.384891]  dump_stack_lvl+0x33/0x50
+[ 2453.385285]  print_report+0xc2/0x610
+[ 2453.385667]  ? __virt_addr_valid+0xb1/0x130
+[ 2453.386091]  ? mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core]
+[ 2453.386757]  kasan_report+0xae/0xe0
+[ 2453.387123]  ? mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core]
+[ 2453.387798]  mlx5e_rep_neigh_entry_release+0x109/0x3a0 [mlx5_core]
+[ 2453.388465]  mlx5e_rep_encap_entry_detach+0xa6/0xe0 [mlx5_core]
+[ 2453.389111]  mlx5e_encap_dealloc+0xa7/0x100 [mlx5_core]
+[ 2453.389706]  mlx5e_tc_tun_encap_dests_unset+0x61/0xb0 [mlx5_core]
+[ 2453.390361]  mlx5_free_flow_attr_actions+0x11e/0x340 [mlx5_core]
+[ 2453.391015]  ? complete_all+0x43/0xd0
+[ 2453.391398]  ? free_flow_post_acts+0x38/0x120 [mlx5_core]
+[ 2453.392004]  mlx5e_tc_del_fdb_flow+0x4ae/0x690 [mlx5_core]
+[ 2453.392618]  mlx5e_tc_del_fdb_peers_flow+0x308/0x370 [mlx5_core]
+[ 2453.393276]  mlx5e_tc_clean_fdb_peer_flows+0xf5/0x140 [mlx5_core]
+[ 2453.393925]  mlx5_esw_offloads_unpair+0x86/0x540 [mlx5_core]
+[ 2453.394546]  ? mlx5_esw_offloads_set_ns_peer.isra.0+0x180/0x180 [mlx5_core]
+[ 2453.395268]  ? down_write+0xaa/0x100
+[ 2453.395652]  mlx5_esw_offloads_devcom_event+0x203/0x530 [mlx5_core]
+[ 2453.396317]  mlx5_devcom_send_event+0xbb/0x190 [mlx5_core]
+[ 2453.396917]  mlx5_esw_offloads_devcom_cleanup+0xb0/0xd0 [mlx5_core]
+[ 2453.397582]  mlx5e_tc_esw_cleanup+0x42/0x120 [mlx5_core]
+[ 2453.398182]  mlx5e_rep_tc_cleanup+0x15/0x30 [mlx5_core]
+[ 2453.398768]  mlx5e_cleanup_rep_tx+0x6c/0x80 [mlx5_core]
+[ 2453.399367]  mlx5e_detach_netdev+0xee/0x120 [mlx5_core]
+[ 2453.399957]  mlx5e_netdev_change_profile+0x84/0x170 [mlx5_core]
+[ 2453.400598]  mlx5e_vport_rep_unload+0xe0/0xf0 [mlx5_core]
+[ 2453.403781]  mlx5_eswitch_unregister_vport_reps+0x15e/0x190 [mlx5_core]
+[ 2453.404479]  ? mlx5_eswitch_register_vport_reps+0x200/0x200 [mlx5_core]
+[ 2453.405170]  ? up_write+0x39/0x60
+[ 2453.405529]  ? kernfs_remove_by_name_ns+0xb7/0xe0
+[ 2453.405985]  auxiliary_bus_remove+0x2e/0x40
+[ 2453.406405]  device_release_driver_internal+0x243/0x2d0
+[ 2453.406900]  ? kobject_put+0x42/0x2d0
+[ 2453.407284]  bus_remove_device+0x128/0x1d0
+[ 2453.407687]  device_del+0x240/0x550
+[ 2453.408053]  ? waiting_for_supplier_show+0xe0/0xe0
+[ 2453.408511]  ? kobject_put+0xfa/0x2d0
+[ 2453.408889]  ? __kmem_cache_free+0x14d/0x280
+[ 2453.409310]  mlx5_rescan_drivers_locked.part.0+0xcd/0x2b0 [mlx5_core]
+[ 2453.409973]  mlx5_unregister_device+0x40/0x50 [mlx5_core]
+[ 2453.410561]  mlx5_uninit_one+0x3d/0x110 [mlx5_core]
+[ 2453.411111]  remove_one+0x89/0x130 [mlx5_core]
+[ 2453.411628]  pci_device_remove+0x59/0xf0
+[ 2453.412026]  device_release_driver_internal+0x243/0x2d0
+[ 2453.412511]  ? parse_option_str+0x14/0x90
+[ 2453.412915]  driver_detach+0x7b/0xf0
+[ 2453.413289]  bus_remove_driver+0xb5/0x160
+[ 2453.413685]  pci_unregister_driver+0x3f/0xf0
+[ 2453.414104]  mlx5_cleanup+0xc/0x20 [mlx5_core]
+
+Fixes: 2be5bd42a5bb ("net/mlx5: Handle pairing of E-switch via uplink un/load APIs")
+Signed-off-by: Jianbo Liu <jianbol@nvidia.com>
+Reviewed-by: Vlad Buslov <vladbu@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_rep.c    | 17 +++++++----------
+ 1 file changed, 7 insertions(+), 10 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+index 95d8714765f70..ad63d1f9a611f 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+@@ -1112,6 +1112,10 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+               return err;
+       }
+ 
++      err = mlx5e_rep_neigh_init(rpriv);
++      if (err)
++              goto err_neigh_init;
++
+       if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
+               err = mlx5e_init_uplink_rep_tx(rpriv);
+               if (err)
+@@ -1128,6 +1132,8 @@ static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
+       if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+               mlx5e_cleanup_uplink_rep_tx(rpriv);
+ err_init_tx:
++      mlx5e_rep_neigh_cleanup(rpriv);
++err_neigh_init:
+       mlx5e_destroy_tises(priv);
+       return err;
+ }
+@@ -1141,22 +1147,17 @@ static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
+       if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+               mlx5e_cleanup_uplink_rep_tx(rpriv);
+ 
++      mlx5e_rep_neigh_cleanup(rpriv);
+       mlx5e_destroy_tises(priv);
+ }
+ 
+ static void mlx5e_rep_enable(struct mlx5e_priv *priv)
+ {
+-      struct mlx5e_rep_priv *rpriv = priv->ppriv;
+-
+       mlx5e_set_netdev_mtu_boundaries(priv);
+-      mlx5e_rep_neigh_init(rpriv);
+ }
+ 
+ static void mlx5e_rep_disable(struct mlx5e_priv *priv)
+ {
+-      struct mlx5e_rep_priv *rpriv = priv->ppriv;
+-
+-      mlx5e_rep_neigh_cleanup(rpriv);
+ }
+ 
+ static int mlx5e_update_rep_rx(struct mlx5e_priv *priv)
+@@ -1206,7 +1207,6 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
+ 
+ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+ {
+-      struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct net_device *netdev = priv->netdev;
+       struct mlx5_core_dev *mdev = priv->mdev;
+       u16 max_mtu;
+@@ -1228,7 +1228,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+       mlx5_notifier_register(mdev, &priv->events_nb);
+       mlx5e_dcbnl_initialize(priv);
+       mlx5e_dcbnl_init_app(priv);
+-      mlx5e_rep_neigh_init(rpriv);
+       mlx5e_rep_bridge_init(priv);
+ 
+       netdev->wanted_features |= NETIF_F_HW_TC;
+@@ -1243,7 +1242,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
+ 
+ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+ {
+-      struct mlx5e_rep_priv *rpriv = priv->ppriv;
+       struct mlx5_core_dev *mdev = priv->mdev;
+ 
+       rtnl_lock();
+@@ -1253,7 +1251,6 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
+       rtnl_unlock();
+ 
+       mlx5e_rep_bridge_cleanup(priv);
+-      mlx5e_rep_neigh_cleanup(rpriv);
+       mlx5e_dcbnl_delete_app(priv);
+       mlx5_notifier_unregister(mdev, &priv->events_nb);
+       mlx5e_rep_tc_disable(priv);
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-set-proper-ipsec-source-port-in-l4-selecto.patch b/queue-6.4/net-mlx5e-set-proper-ipsec-source-port-in-l4-selecto.patch

new file mode 100644 (file)

index 0000000..f19802c
--- /dev/null
+++ b/queue-6.4/net-mlx5e-set-proper-ipsec-source-port-in-l4-selecto.patch
@@ -0,0 +1,40 @@
+From f3cc58db7c7af2a2f5198736fba7a505b1abbb30 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 14:58:42 +0300
+Subject: net/mlx5e: Set proper IPsec source port in L4 selector
+
+From: Leon Romanovsky <leonro@nvidia.com>
+
+[ Upstream commit 62da08331f1a2bef9d0148613133ce8e640a2f8d ]
+
+Fix typo in setup_fte_upper_proto_match() where destination UDP port
+was used instead of source port.
+
+Fixes: a7385187a386 ("net/mlx5e: IPsec, support upper protocol selector field offload")
+Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/ffc024a4d192113103f392b0502688366ca88c1f.1690803944.git.leonro@nvidia.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+index dbe87bf89c0dd..832d36be4a17b 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c
+@@ -808,9 +808,9 @@ static void setup_fte_upper_proto_match(struct mlx5_flow_spec *spec, struct upsp
+       }
+ 
+       if (upspec->sport) {
+-              MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport,
++              MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_sport,
+                        upspec->sport_mask);
+-              MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, upspec->sport);
++              MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_sport, upspec->sport);
+       }
+ }
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-xsk-fix-crash-on-regular-rq-reactivation.patch b/queue-6.4/net-mlx5e-xsk-fix-crash-on-regular-rq-reactivation.patch

new file mode 100644 (file)

index 0000000..c272e14
--- /dev/null
+++ b/queue-6.4/net-mlx5e-xsk-fix-crash-on-regular-rq-reactivation.patch
@@ -0,0 +1,90 @@
+From 75ec6a970c92a7f6c9d650f3af1d7ffb1187909d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Apr 2023 18:19:00 +0300
+Subject: net/mlx5e: xsk: Fix crash on regular rq reactivation
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit 39646d9bcd1a65d2396328026626859a1dab59d7 ]
+
+When the regular rq is reactivated after the XSK socket is closed
+it could be reading stale cqes which eventually corrupts the rq.
+This leads to no more traffic being received on the regular rq and a
+crash on the next close or deactivation of the rq.
+
+Kal Cuttler Conely reported this issue as a crash on the release
+path when the xdpsock sample program is stopped (killed) and restarted
+in sequence while traffic is running.
+
+This patch flushes all cqes when during the rq flush. The cqe flushing
+is done in the reset state of the rq. mlx5e_rq_to_ready code is moved
+into the flush function to allow for this.
+
+Fixes: 082a9edf12fe ("net/mlx5e: xsk: Flush RQ on XSK activation to save memory")
+Reported-by: Kal Cutter Conley <kal.conley@dectris.com>
+Closes: https://lore.kernel.org/xdp-newbies/CAHApi-nUAs4TeFWUDV915CZJo07XVg2Vp63-no7UDfj6wur9nQ@mail.gmail.com
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../net/ethernet/mellanox/mlx5/core/en_main.c | 29 ++++++++++++++-----
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+index a5bdf78955d76..f084513fbead4 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+@@ -1036,7 +1036,23 @@ static int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_s
+       return err;
+ }
+ 
+-static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
++static void mlx5e_flush_rq_cq(struct mlx5e_rq *rq)
++{
++      struct mlx5_cqwq *cqwq = &rq->cq.wq;
++      struct mlx5_cqe64 *cqe;
++
++      if (test_bit(MLX5E_RQ_STATE_MINI_CQE_ENHANCED, &rq->state)) {
++              while ((cqe = mlx5_cqwq_get_cqe_enahnced_comp(cqwq)))
++                      mlx5_cqwq_pop(cqwq);
++      } else {
++              while ((cqe = mlx5_cqwq_get_cqe(cqwq)))
++                      mlx5_cqwq_pop(cqwq);
++      }
++
++      mlx5_cqwq_update_db_record(cqwq);
++}
++
++int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
+ {
+       struct net_device *dev = rq->netdev;
+       int err;
+@@ -1046,6 +1062,10 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+               netdev_err(dev, "Failed to move rq 0x%x to reset\n", rq->rqn);
+               return err;
+       }
++
++      mlx5e_free_rx_descs(rq);
++      mlx5e_flush_rq_cq(rq);
++
+       err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
+       if (err) {
+               netdev_err(dev, "Failed to move rq 0x%x to ready\n", rq->rqn);
+@@ -1055,13 +1075,6 @@ static int mlx5e_rq_to_ready(struct mlx5e_rq *rq, int curr_state)
+       return 0;
+ }
+ 
+-int mlx5e_flush_rq(struct mlx5e_rq *rq, int curr_state)
+-{
+-      mlx5e_free_rx_descs(rq);
+-
+-      return mlx5e_rq_to_ready(rq, curr_state);
+-}
+-
+ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
+ {
+       struct mlx5_core_dev *mdev = rq->mdev;
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-mlx5e-xsk-fix-invalid-buffer-access-for-legacy-r.patch b/queue-6.4/net-mlx5e-xsk-fix-invalid-buffer-access-for-legacy-r.patch

new file mode 100644 (file)

index 0000000..acaa399
--- /dev/null
+++ b/queue-6.4/net-mlx5e-xsk-fix-invalid-buffer-access-for-legacy-r.patch
@@ -0,0 +1,90 @@
+From 783e21913110f53f7eed284129fc176b05e7bc88 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 11:13:33 +0300
+Subject: net/mlx5e: xsk: Fix invalid buffer access for legacy rq
+
+From: Dragos Tatulea <dtatulea@nvidia.com>
+
+[ Upstream commit e0f52298fee449fec37e3e3c32df60008b509b16 ]
+
+The below crash can be encountered when using xdpsock in rx mode for
+legacy rq: the buffer gets released in the XDP_REDIRECT path, and then
+once again in the driver. This fix sets the flag to avoid releasing on
+the driver side.
+
+XSK handling of buffers for legacy rq was relying on the caller to set
+the skip release flag. But the referenced fix started using fragment
+counts for pages instead of the skip flag.
+
+Crash log:
+ general protection fault, probably for non-canonical address 0xffff8881217e3a: 0000 [#1] SMP
+ CPU: 0 PID: 14 Comm: ksoftirqd/0 Not tainted 6.5.0-rc1+ #31
+ Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
+ RIP: 0010:bpf_prog_03b13f331978c78c+0xf/0x28
+ Code:  ...
+ RSP: 0018:ffff88810082fc98 EFLAGS: 00010246
+ RAX: 0000000000000000 RBX: ffff888138404901 RCX: c0ffffc900027cbc
+ RDX: ffffffffa000b514 RSI: 00ffff8881217e32 RDI: ffff888138404901
+ RBP: ffff88810082fc98 R08: 0000000000091100 R09: 0000000000000006
+ R10: 0000000000000800 R11: 0000000000000800 R12: ffffc9000027a000
+ R13: ffff8881217e2dc0 R14: ffff8881217e2910 R15: ffff8881217e2f00
+ FS:  0000000000000000(0000) GS:ffff88852c800000(0000) knlGS:0000000000000000
+ CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: 0000564cb2e2cde0 CR3: 000000010e603004 CR4: 0000000000370eb0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+  <TASK>
+  ? die_addr+0x32/0x80
+  ? exc_general_protection+0x192/0x390
+  ? asm_exc_general_protection+0x22/0x30
+  ? 0xffffffffa000b514
+  ? bpf_prog_03b13f331978c78c+0xf/0x28
+  mlx5e_xdp_handle+0x48/0x670 [mlx5_core]
+  ? dev_gro_receive+0x3b5/0x6e0
+  mlx5e_xsk_skb_from_cqe_linear+0x6e/0x90 [mlx5_core]
+  mlx5e_handle_rx_cqe+0x55/0x100 [mlx5_core]
+  mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core]
+  mlx5e_napi_poll+0x45e/0x6b0 [mlx5_core]
+  __napi_poll+0x25/0x1a0
+  net_rx_action+0x28a/0x300
+  __do_softirq+0xcd/0x279
+  ? sort_range+0x20/0x20
+  run_ksoftirqd+0x1a/0x20
+  smpboot_thread_fn+0xa2/0x130
+  kthread+0xc9/0xf0
+  ? kthread_complete_and_exit+0x20/0x20
+  ret_from_fork+0x1f/0x30
+  </TASK>
+ Modules linked in: mlx5_ib mlx5_core rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter overlay zram zsmalloc fuse [last unloaded: mlx5_core]
+ ---[ end trace 0000000000000000 ]---
+
+Fixes: 7abd955a58fb ("net/mlx5e: RX, Fix page_pool page fragment tracking for XDP")
+Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+index d97e6df66f454..b8dd744536553 100644
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+@@ -323,8 +323,11 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
+       net_prefetch(mxbuf->xdp.data);
+ 
+       prog = rcu_dereference(rq->xdp_prog);
+-      if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf)))
++      if (likely(prog && mlx5e_xdp_handle(rq, prog, mxbuf))) {
++              if (likely(__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags)))
++                      wi->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE);
+               return NULL; /* page/packet was consumed by XDP */
++      }
+ 
+       /* XDP_PASS: copy the data from the UMEM to a new SKB. The frame reuse
+        * will be handled by mlx5e_free_rx_wqe.
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-move-gso-declarations-and-functions-to-their-own.patch b/queue-6.4/net-move-gso-declarations-and-functions-to-their-own.patch

new file mode 100644 (file)

index 0000000..ef0ea15
--- /dev/null
+++ b/queue-6.4/net-move-gso-declarations-and-functions-to-their-own.patch
@@ -0,0 +1,1407 @@
+From bcd8cfc5cb09aff8ac1709f4751071d12dd00ddd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 8 Jun 2023 19:17:37 +0000
+Subject: net: move gso declarations and functions to their own files
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d457a0e329b0bfd3a1450e0b1a18cd2b47a25a08 ]
+
+Move declarations into include/net/gso.h and code into net/core/gso.c
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Stanislav Fomichev <sdf@google.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Link: https://lore.kernel.org/r/20230608191738.3947077-1-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 7938cd154368 ("net: gro: fix misuse of CB in udp socket lookup")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/broadcom/tg3.c           |   1 +
+ .../net/ethernet/myricom/myri10ge/myri10ge.c  |   1 +
+ drivers/net/ethernet/sfc/siena/tx_common.c    |   1 +
+ drivers/net/ethernet/sfc/tx_common.c          |   1 +
+ drivers/net/tap.c                             |   1 +
+ drivers/net/usb/r8152.c                       |   1 +
+ drivers/net/wireguard/device.c                |   1 +
+ drivers/net/wireless/intel/iwlwifi/mvm/tx.c   |   1 +
+ include/linux/netdevice.h                     |  26 +-
+ include/linux/skbuff.h                        |  71 -----
+ include/net/gro.h                             |   1 +
+ include/net/gso.h                             | 109 +++++++
+ include/net/udp.h                             |   1 +
+ net/core/Makefile                             |   2 +-
+ net/core/dev.c                                |  70 +----
+ net/core/gro.c                                |  59 +---
+ net/core/gso.c                                | 273 ++++++++++++++++++
+ net/core/skbuff.c                             | 142 +--------
+ net/ipv4/af_inet.c                            |   1 +
+ net/ipv4/esp4_offload.c                       |   1 +
+ net/ipv4/gre_offload.c                        |   1 +
+ net/ipv4/ip_output.c                          |   1 +
+ net/ipv4/tcp_offload.c                        |   1 +
+ net/ipv4/udp.c                                |   1 +
+ net/ipv4/udp_offload.c                        |   1 +
+ net/ipv6/esp6_offload.c                       |   1 +
+ net/ipv6/ip6_offload.c                        |   1 +
+ net/ipv6/ip6_output.c                         |   1 +
+ net/ipv6/udp_offload.c                        |   1 +
+ net/mac80211/tx.c                             |   1 +
+ net/mpls/af_mpls.c                            |   1 +
+ net/mpls/mpls_gso.c                           |   1 +
+ net/netfilter/nf_flow_table_ip.c              |   1 +
+ net/netfilter/nfnetlink_queue.c               |   1 +
+ net/nsh/nsh.c                                 |   1 +
+ net/openvswitch/actions.c                     |   1 +
+ net/openvswitch/datapath.c                    |   1 +
+ net/sched/act_police.c                        |   1 +
+ net/sched/sch_cake.c                          |   1 +
+ net/sched/sch_netem.c                         |   1 +
+ net/sched/sch_taprio.c                        |   1 +
+ net/sched/sch_tbf.c                           |   1 +
+ net/sctp/offload.c                            |   1 +
+ net/xfrm/xfrm_device.c                        |   1 +
+ net/xfrm/xfrm_interface_core.c                |   1 +
+ net/xfrm/xfrm_output.c                        |   1 +
+ 46 files changed, 425 insertions(+), 365 deletions(-)
+ create mode 100644 include/net/gso.h
+ create mode 100644 net/core/gso.c
+
+diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
+index a52cf9aae4988..5ef073a79ce94 100644
+--- a/drivers/net/ethernet/broadcom/tg3.c
++++ b/drivers/net/ethernet/broadcom/tg3.c
+@@ -57,6 +57,7 @@
+ #include <linux/crc32poly.h>
+ 
+ #include <net/checksum.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ 
+ #include <linux/io.h>
+diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+index c5687d94ea885..7b7e1c5b00f47 100644
+--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
++++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+@@ -66,6 +66,7 @@
+ #include <linux/slab.h>
+ #include <linux/prefetch.h>
+ #include <net/checksum.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/tcp.h>
+ #include <asm/byteorder.h>
+diff --git a/drivers/net/ethernet/sfc/siena/tx_common.c b/drivers/net/ethernet/sfc/siena/tx_common.c
+index 93a32d61944f0..a7a9ab304e136 100644
+--- a/drivers/net/ethernet/sfc/siena/tx_common.c
++++ b/drivers/net/ethernet/sfc/siena/tx_common.c
+@@ -12,6 +12,7 @@
+ #include "efx.h"
+ #include "nic_common.h"
+ #include "tx_common.h"
++#include <net/gso.h>
+ 
+ static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+ {
+diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
+index 755aa92bf8236..9f2393d343715 100644
+--- a/drivers/net/ethernet/sfc/tx_common.c
++++ b/drivers/net/ethernet/sfc/tx_common.c
+@@ -12,6 +12,7 @@
+ #include "efx.h"
+ #include "nic_common.h"
+ #include "tx_common.h"
++#include <net/gso.h>
+ 
+ static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
+ {
+diff --git a/drivers/net/tap.c b/drivers/net/tap.c
+index d30d730ed5a71..9137fb8c1c420 100644
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -18,6 +18,7 @@
+ #include <linux/fs.h>
+ #include <linux/uio.h>
+ 
++#include <net/gso.h>
+ #include <net/net_namespace.h>
+ #include <net/rtnetlink.h>
+ #include <net/sock.h>
+diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
+index 0999a58ca9d26..0738baa5b82e4 100644
+--- a/drivers/net/usb/r8152.c
++++ b/drivers/net/usb/r8152.c
+@@ -27,6 +27,7 @@
+ #include <linux/firmware.h>
+ #include <crypto/hash.h>
+ #include <linux/usb/r8152.h>
++#include <net/gso.h>
+ 
+ /* Information for net-next */
+ #define NETNEXT_VERSION               "12"
+diff --git a/drivers/net/wireguard/device.c b/drivers/net/wireguard/device.c
+index d58e9f818d3b7..258dcc1039216 100644
+--- a/drivers/net/wireguard/device.c
++++ b/drivers/net/wireguard/device.c
+@@ -20,6 +20,7 @@
+ #include <linux/icmp.h>
+ #include <linux/suspend.h>
+ #include <net/dst_metadata.h>
++#include <net/gso.h>
+ #include <net/icmp.h>
+ #include <net/rtnetlink.h>
+ #include <net/ip_tunnels.h>
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+index 00719e1304386..682733193d3de 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+@@ -7,6 +7,7 @@
+ #include <linux/ieee80211.h>
+ #include <linux/etherdevice.h>
+ #include <linux/tcp.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ 
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 68adc8af29efb..9291c04a2e09d 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4827,13 +4827,6 @@ int skb_crc32c_csum_help(struct sk_buff *skb);
+ int skb_csum_hwoffload_help(struct sk_buff *skb,
+                           const netdev_features_t features);
+ 
+-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+-                                netdev_features_t features, bool tx_path);
+-struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+-                                  netdev_features_t features, __be16 type);
+-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+-                                  netdev_features_t features);
+-
+ struct netdev_bonding_info {
+       ifslave slave;
+       ifbond  master;
+@@ -4856,11 +4849,6 @@ static inline void ethtool_notify(struct net_device *dev, unsigned int cmd,
+ }
+ #endif
+ 
+-static inline
+-struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
+-{
+-      return __skb_gso_segment(skb, features, true);
+-}
+ __be16 skb_network_protocol(struct sk_buff *skb, int *depth);
+ 
+ static inline bool can_checksum_protocol(netdev_features_t features,
+@@ -4987,6 +4975,7 @@ netdev_features_t passthru_features_check(struct sk_buff *skb,
+                                         struct net_device *dev,
+                                         netdev_features_t features);
+ netdev_features_t netif_skb_features(struct sk_buff *skb);
++void skb_warn_bad_offload(const struct sk_buff *skb);
+ 
+ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
+ {
+@@ -5035,19 +5024,6 @@ void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs);
+ void netif_inherit_tso_max(struct net_device *to,
+                          const struct net_device *from);
+ 
+-static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
+-                                      int pulled_hlen, u16 mac_offset,
+-                                      int mac_len)
+-{
+-      skb->protocol = protocol;
+-      skb->encapsulation = 1;
+-      skb_push(skb, pulled_hlen);
+-      skb_reset_transport_header(skb);
+-      skb->mac_header = mac_offset;
+-      skb->network_header = skb->mac_header + mac_len;
+-      skb->mac_len = mac_len;
+-}
+-
+ static inline bool netif_is_macsec(const struct net_device *dev)
+ {
+       return dev->priv_flags & IFF_MACSEC;
+diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
+index 0b40417457cd1..fdd9db2612968 100644
+--- a/include/linux/skbuff.h
++++ b/include/linux/skbuff.h
+@@ -3992,8 +3992,6 @@ int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
+ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
+ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
+ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
+-bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
+-bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
+ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+ struct sk_buff *skb_segment_list(struct sk_buff *skb, netdev_features_t features,
+                                unsigned int offset);
+@@ -4859,75 +4857,6 @@ static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
+ #endif
+ }
+ 
+-/* Keeps track of mac header offset relative to skb->head.
+- * It is useful for TSO of Tunneling protocol. e.g. GRE.
+- * For non-tunnel skb it points to skb_mac_header() and for
+- * tunnel skb it points to outer mac header.
+- * Keeps track of level of encapsulation of network headers.
+- */
+-struct skb_gso_cb {
+-      union {
+-              int     mac_offset;
+-              int     data_offset;
+-      };
+-      int     encap_level;
+-      __wsum  csum;
+-      __u16   csum_start;
+-};
+-#define SKB_GSO_CB_OFFSET     32
+-#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
+-
+-static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
+-{
+-      return (skb_mac_header(inner_skb) - inner_skb->head) -
+-              SKB_GSO_CB(inner_skb)->mac_offset;
+-}
+-
+-static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
+-{
+-      int new_headroom, headroom;
+-      int ret;
+-
+-      headroom = skb_headroom(skb);
+-      ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
+-      if (ret)
+-              return ret;
+-
+-      new_headroom = skb_headroom(skb);
+-      SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
+-      return 0;
+-}
+-
+-static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
+-{
+-      /* Do not update partial checksums if remote checksum is enabled. */
+-      if (skb->remcsum_offload)
+-              return;
+-
+-      SKB_GSO_CB(skb)->csum = res;
+-      SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
+-}
+-
+-/* Compute the checksum for a gso segment. First compute the checksum value
+- * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+- * then add in skb->csum (checksum from csum_start to end of packet).
+- * skb->csum and csum_start are then updated to reflect the checksum of the
+- * resultant packet starting from the transport header-- the resultant checksum
+- * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+- * header.
+- */
+-static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+-{
+-      unsigned char *csum_start = skb_transport_header(skb);
+-      int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
+-      __wsum partial = SKB_GSO_CB(skb)->csum;
+-
+-      SKB_GSO_CB(skb)->csum = res;
+-      SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
+-
+-      return csum_fold(csum_partial(csum_start, plen, partial));
+-}
+-
+ static inline bool skb_is_gso(const struct sk_buff *skb)
+ {
+       return skb_shinfo(skb)->gso_size;
+diff --git a/include/net/gro.h b/include/net/gro.h
+index a4fab706240d2..972ff42d3a829 100644
+--- a/include/net/gro.h
++++ b/include/net/gro.h
+@@ -446,5 +446,6 @@ static inline void gro_normal_one(struct napi_struct *napi, struct sk_buff *skb,
+               gro_normal_list(napi);
+ }
+ 
++extern struct list_head offload_base;
+ 
+ #endif /* _NET_IPV6_GRO_H */
+diff --git a/include/net/gso.h b/include/net/gso.h
+new file mode 100644
+index 0000000000000..29975440cad51
+--- /dev/null
++++ b/include/net/gso.h
+@@ -0,0 +1,109 @@
++/* SPDX-License-Identifier: GPL-2.0-or-later */
++
++#ifndef _NET_GSO_H
++#define _NET_GSO_H
++
++#include <linux/skbuff.h>
++
++/* Keeps track of mac header offset relative to skb->head.
++ * It is useful for TSO of Tunneling protocol. e.g. GRE.
++ * For non-tunnel skb it points to skb_mac_header() and for
++ * tunnel skb it points to outer mac header.
++ * Keeps track of level of encapsulation of network headers.
++ */
++struct skb_gso_cb {
++      union {
++              int     mac_offset;
++              int     data_offset;
++      };
++      int     encap_level;
++      __wsum  csum;
++      __u16   csum_start;
++};
++#define SKB_GSO_CB_OFFSET     32
++#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET))
++
++static inline int skb_tnl_header_len(const struct sk_buff *inner_skb)
++{
++      return (skb_mac_header(inner_skb) - inner_skb->head) -
++              SKB_GSO_CB(inner_skb)->mac_offset;
++}
++
++static inline int gso_pskb_expand_head(struct sk_buff *skb, int extra)
++{
++      int new_headroom, headroom;
++      int ret;
++
++      headroom = skb_headroom(skb);
++      ret = pskb_expand_head(skb, extra, 0, GFP_ATOMIC);
++      if (ret)
++              return ret;
++
++      new_headroom = skb_headroom(skb);
++      SKB_GSO_CB(skb)->mac_offset += (new_headroom - headroom);
++      return 0;
++}
++
++static inline void gso_reset_checksum(struct sk_buff *skb, __wsum res)
++{
++      /* Do not update partial checksums if remote checksum is enabled. */
++      if (skb->remcsum_offload)
++              return;
++
++      SKB_GSO_CB(skb)->csum = res;
++      SKB_GSO_CB(skb)->csum_start = skb_checksum_start(skb) - skb->head;
++}
++
++/* Compute the checksum for a gso segment. First compute the checksum value
++ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
++ * then add in skb->csum (checksum from csum_start to end of packet).
++ * skb->csum and csum_start are then updated to reflect the checksum of the
++ * resultant packet starting from the transport header-- the resultant checksum
++ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
++ * header.
++ */
++static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
++{
++      unsigned char *csum_start = skb_transport_header(skb);
++      int plen = (skb->head + SKB_GSO_CB(skb)->csum_start) - csum_start;
++      __wsum partial = SKB_GSO_CB(skb)->csum;
++
++      SKB_GSO_CB(skb)->csum = res;
++      SKB_GSO_CB(skb)->csum_start = csum_start - skb->head;
++
++      return csum_fold(csum_partial(csum_start, plen, partial));
++}
++
++struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
++                                netdev_features_t features, bool tx_path);
++
++static inline struct sk_buff *skb_gso_segment(struct sk_buff *skb,
++                                            netdev_features_t features)
++{
++      return __skb_gso_segment(skb, features, true);
++}
++
++struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
++                                  netdev_features_t features, __be16 type);
++
++struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
++                                  netdev_features_t features);
++
++bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu);
++
++bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len);
++
++static inline void skb_gso_error_unwind(struct sk_buff *skb, __be16 protocol,
++                                      int pulled_hlen, u16 mac_offset,
++                                      int mac_len)
++{
++      skb->protocol = protocol;
++      skb->encapsulation = 1;
++      skb_push(skb, pulled_hlen);
++      skb_reset_transport_header(skb);
++      skb->mac_header = mac_offset;
++      skb->network_header = skb->mac_header + mac_len;
++      skb->mac_len = mac_len;
++}
++
++#endif /* _NET_GSO_H */
+diff --git a/include/net/udp.h b/include/net/udp.h
+index de4b528522bb9..94f3486c43e33 100644
+--- a/include/net/udp.h
++++ b/include/net/udp.h
+@@ -21,6 +21,7 @@
+ #include <linux/list.h>
+ #include <linux/bug.h>
+ #include <net/inet_sock.h>
++#include <net/gso.h>
+ #include <net/sock.h>
+ #include <net/snmp.h>
+ #include <net/ip.h>
+diff --git a/net/core/Makefile b/net/core/Makefile
+index 8f367813bc681..731db2eaa6107 100644
+--- a/net/core/Makefile
++++ b/net/core/Makefile
+@@ -13,7 +13,7 @@ obj-y                     += dev.o dev_addr_lists.o dst.o netevent.o \
+                       neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+                       sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
+                       fib_notifier.o xdp.o flow_offload.o gro.o \
+-                      netdev-genl.o netdev-genl-gen.o
++                      netdev-genl.o netdev-genl-gen.o gso.o
+ 
+ obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
+ 
+diff --git a/net/core/dev.c b/net/core/dev.c
+index c29f3e1db3ca7..44a4eb76a659e 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -3209,7 +3209,7 @@ static u16 skb_tx_hash(const struct net_device *dev,
+       return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+ }
+ 
+-static void skb_warn_bad_offload(const struct sk_buff *skb)
++void skb_warn_bad_offload(const struct sk_buff *skb)
+ {
+       static const netdev_features_t null_features;
+       struct net_device *dev = skb->dev;
+@@ -3338,74 +3338,6 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth)
+       return vlan_get_protocol_and_depth(skb, type, depth);
+ }
+ 
+-/* openvswitch calls this on rx path, so we need a different check.
+- */
+-static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
+-{
+-      if (tx_path)
+-              return skb->ip_summed != CHECKSUM_PARTIAL &&
+-                     skb->ip_summed != CHECKSUM_UNNECESSARY;
+-
+-      return skb->ip_summed == CHECKSUM_NONE;
+-}
+-
+-/**
+- *    __skb_gso_segment - Perform segmentation on skb.
+- *    @skb: buffer to segment
+- *    @features: features for the output path (see dev->features)
+- *    @tx_path: whether it is called in TX path
+- *
+- *    This function segments the given skb and returns a list of segments.
+- *
+- *    It may return NULL if the skb requires no segmentation.  This is
+- *    only possible when GSO is used for verifying header integrity.
+- *
+- *    Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
+- */
+-struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
+-                                netdev_features_t features, bool tx_path)
+-{
+-      struct sk_buff *segs;
+-
+-      if (unlikely(skb_needs_check(skb, tx_path))) {
+-              int err;
+-
+-              /* We're going to init ->check field in TCP or UDP header */
+-              err = skb_cow_head(skb, 0);
+-              if (err < 0)
+-                      return ERR_PTR(err);
+-      }
+-
+-      /* Only report GSO partial support if it will enable us to
+-       * support segmentation on this frame without needing additional
+-       * work.
+-       */
+-      if (features & NETIF_F_GSO_PARTIAL) {
+-              netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
+-              struct net_device *dev = skb->dev;
+-
+-              partial_features |= dev->features & dev->gso_partial_features;
+-              if (!skb_gso_ok(skb, features | partial_features))
+-                      features &= ~NETIF_F_GSO_PARTIAL;
+-      }
+-
+-      BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
+-                   sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
+-
+-      SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
+-      SKB_GSO_CB(skb)->encap_level = 0;
+-
+-      skb_reset_mac_header(skb);
+-      skb_reset_mac_len(skb);
+-
+-      segs = skb_mac_gso_segment(skb, features);
+-
+-      if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
+-              skb_warn_bad_offload(skb);
+-
+-      return segs;
+-}
+-EXPORT_SYMBOL(__skb_gso_segment);
+ 
+ /* Take action when hardware reception checksum errors are detected. */
+ #ifdef CONFIG_BUG
+diff --git a/net/core/gro.c b/net/core/gro.c
+index 2d84165cb4f1d..2f1b6524bddc5 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -10,7 +10,7 @@
+ #define GRO_MAX_HEAD (MAX_HEADER + 128)
+ 
+ static DEFINE_SPINLOCK(offload_lock);
+-static struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
++struct list_head offload_base __read_mostly = LIST_HEAD_INIT(offload_base);
+ /* Maximum number of GRO_NORMAL skbs to batch up for list-RX */
+ int gro_normal_batch __read_mostly = 8;
+ 
+@@ -92,63 +92,6 @@ void dev_remove_offload(struct packet_offload *po)
+ }
+ EXPORT_SYMBOL(dev_remove_offload);
+ 
+-/**
+- *    skb_eth_gso_segment - segmentation handler for ethernet protocols.
+- *    @skb: buffer to segment
+- *    @features: features for the output path (see dev->features)
+- *    @type: Ethernet Protocol ID
+- */
+-struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
+-                                  netdev_features_t features, __be16 type)
+-{
+-      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+-      struct packet_offload *ptype;
+-
+-      rcu_read_lock();
+-      list_for_each_entry_rcu(ptype, &offload_base, list) {
+-              if (ptype->type == type && ptype->callbacks.gso_segment) {
+-                      segs = ptype->callbacks.gso_segment(skb, features);
+-                      break;
+-              }
+-      }
+-      rcu_read_unlock();
+-
+-      return segs;
+-}
+-EXPORT_SYMBOL(skb_eth_gso_segment);
+-
+-/**
+- *    skb_mac_gso_segment - mac layer segmentation handler.
+- *    @skb: buffer to segment
+- *    @features: features for the output path (see dev->features)
+- */
+-struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
+-                                  netdev_features_t features)
+-{
+-      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+-      struct packet_offload *ptype;
+-      int vlan_depth = skb->mac_len;
+-      __be16 type = skb_network_protocol(skb, &vlan_depth);
+-
+-      if (unlikely(!type))
+-              return ERR_PTR(-EINVAL);
+-
+-      __skb_pull(skb, vlan_depth);
+-
+-      rcu_read_lock();
+-      list_for_each_entry_rcu(ptype, &offload_base, list) {
+-              if (ptype->type == type && ptype->callbacks.gso_segment) {
+-                      segs = ptype->callbacks.gso_segment(skb, features);
+-                      break;
+-              }
+-      }
+-      rcu_read_unlock();
+-
+-      __skb_push(skb, skb->data - skb_mac_header(skb));
+-
+-      return segs;
+-}
+-EXPORT_SYMBOL(skb_mac_gso_segment);
+ 
+ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
+ {
+diff --git a/net/core/gso.c b/net/core/gso.c
+new file mode 100644
+index 0000000000000..9e1803bfc9c6c
+--- /dev/null
++++ b/net/core/gso.c
+@@ -0,0 +1,273 @@
++// SPDX-License-Identifier: GPL-2.0-or-later
++#include <linux/skbuff.h>
++#include <linux/sctp.h>
++#include <net/gso.h>
++#include <net/gro.h>
++
++/**
++ *    skb_eth_gso_segment - segmentation handler for ethernet protocols.
++ *    @skb: buffer to segment
++ *    @features: features for the output path (see dev->features)
++ *    @type: Ethernet Protocol ID
++ */
++struct sk_buff *skb_eth_gso_segment(struct sk_buff *skb,
++                                  netdev_features_t features, __be16 type)
++{
++      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
++      struct packet_offload *ptype;
++
++      rcu_read_lock();
++      list_for_each_entry_rcu(ptype, &offload_base, list) {
++              if (ptype->type == type && ptype->callbacks.gso_segment) {
++                      segs = ptype->callbacks.gso_segment(skb, features);
++                      break;
++              }
++      }
++      rcu_read_unlock();
++
++      return segs;
++}
++EXPORT_SYMBOL(skb_eth_gso_segment);
++
++/**
++ *    skb_mac_gso_segment - mac layer segmentation handler.
++ *    @skb: buffer to segment
++ *    @features: features for the output path (see dev->features)
++ */
++struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
++                                  netdev_features_t features)
++{
++      struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
++      struct packet_offload *ptype;
++      int vlan_depth = skb->mac_len;
++      __be16 type = skb_network_protocol(skb, &vlan_depth);
++
++      if (unlikely(!type))
++              return ERR_PTR(-EINVAL);
++
++      __skb_pull(skb, vlan_depth);
++
++      rcu_read_lock();
++      list_for_each_entry_rcu(ptype, &offload_base, list) {
++              if (ptype->type == type && ptype->callbacks.gso_segment) {
++                      segs = ptype->callbacks.gso_segment(skb, features);
++                      break;
++              }
++      }
++      rcu_read_unlock();
++
++      __skb_push(skb, skb->data - skb_mac_header(skb));
++
++      return segs;
++}
++EXPORT_SYMBOL(skb_mac_gso_segment);
++/* openvswitch calls this on rx path, so we need a different check.
++ */
++static bool skb_needs_check(const struct sk_buff *skb, bool tx_path)
++{
++      if (tx_path)
++              return skb->ip_summed != CHECKSUM_PARTIAL &&
++                     skb->ip_summed != CHECKSUM_UNNECESSARY;
++
++      return skb->ip_summed == CHECKSUM_NONE;
++}
++
++/**
++ *    __skb_gso_segment - Perform segmentation on skb.
++ *    @skb: buffer to segment
++ *    @features: features for the output path (see dev->features)
++ *    @tx_path: whether it is called in TX path
++ *
++ *    This function segments the given skb and returns a list of segments.
++ *
++ *    It may return NULL if the skb requires no segmentation.  This is
++ *    only possible when GSO is used for verifying header integrity.
++ *
++ *    Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb.
++ */
++struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
++                                netdev_features_t features, bool tx_path)
++{
++      struct sk_buff *segs;
++
++      if (unlikely(skb_needs_check(skb, tx_path))) {
++              int err;
++
++              /* We're going to init ->check field in TCP or UDP header */
++              err = skb_cow_head(skb, 0);
++              if (err < 0)
++                      return ERR_PTR(err);
++      }
++
++      /* Only report GSO partial support if it will enable us to
++       * support segmentation on this frame without needing additional
++       * work.
++       */
++      if (features & NETIF_F_GSO_PARTIAL) {
++              netdev_features_t partial_features = NETIF_F_GSO_ROBUST;
++              struct net_device *dev = skb->dev;
++
++              partial_features |= dev->features & dev->gso_partial_features;
++              if (!skb_gso_ok(skb, features | partial_features))
++                      features &= ~NETIF_F_GSO_PARTIAL;
++      }
++
++      BUILD_BUG_ON(SKB_GSO_CB_OFFSET +
++                   sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb));
++
++      SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb);
++      SKB_GSO_CB(skb)->encap_level = 0;
++
++      skb_reset_mac_header(skb);
++      skb_reset_mac_len(skb);
++
++      segs = skb_mac_gso_segment(skb, features);
++
++      if (segs != skb && unlikely(skb_needs_check(skb, tx_path) && !IS_ERR(segs)))
++              skb_warn_bad_offload(skb);
++
++      return segs;
++}
++EXPORT_SYMBOL(__skb_gso_segment);
++
++/**
++ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
++ *
++ * @skb: GSO skb
++ *
++ * skb_gso_transport_seglen is used to determine the real size of the
++ * individual segments, including Layer4 headers (TCP/UDP).
++ *
++ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
++ */
++static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
++{
++      const struct skb_shared_info *shinfo = skb_shinfo(skb);
++      unsigned int thlen = 0;
++
++      if (skb->encapsulation) {
++              thlen = skb_inner_transport_header(skb) -
++                      skb_transport_header(skb);
++
++              if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
++                      thlen += inner_tcp_hdrlen(skb);
++      } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
++              thlen = tcp_hdrlen(skb);
++      } else if (unlikely(skb_is_gso_sctp(skb))) {
++              thlen = sizeof(struct sctphdr);
++      } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
++              thlen = sizeof(struct udphdr);
++      }
++      /* UFO sets gso_size to the size of the fragmentation
++       * payload, i.e. the size of the L4 (UDP) header is already
++       * accounted for.
++       */
++      return thlen + shinfo->gso_size;
++}
++
++/**
++ * skb_gso_network_seglen - Return length of individual segments of a gso packet
++ *
++ * @skb: GSO skb
++ *
++ * skb_gso_network_seglen is used to determine the real size of the
++ * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
++ *
++ * The MAC/L2 header is not accounted for.
++ */
++static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
++{
++      unsigned int hdr_len = skb_transport_header(skb) -
++                             skb_network_header(skb);
++
++      return hdr_len + skb_gso_transport_seglen(skb);
++}
++
++/**
++ * skb_gso_mac_seglen - Return length of individual segments of a gso packet
++ *
++ * @skb: GSO skb
++ *
++ * skb_gso_mac_seglen is used to determine the real size of the
++ * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
++ * headers (TCP/UDP).
++ */
++static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
++{
++      unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
++
++      return hdr_len + skb_gso_transport_seglen(skb);
++}
++
++/**
++ * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
++ *
++ * There are a couple of instances where we have a GSO skb, and we
++ * want to determine what size it would be after it is segmented.
++ *
++ * We might want to check:
++ * -    L3+L4+payload size (e.g. IP forwarding)
++ * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
++ *
++ * This is a helper to do that correctly considering GSO_BY_FRAGS.
++ *
++ * @skb: GSO skb
++ *
++ * @seg_len: The segmented length (from skb_gso_*_seglen). In the
++ *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
++ *
++ * @max_len: The maximum permissible length.
++ *
++ * Returns true if the segmented length <= max length.
++ */
++static inline bool skb_gso_size_check(const struct sk_buff *skb,
++                                    unsigned int seg_len,
++                                    unsigned int max_len) {
++      const struct skb_shared_info *shinfo = skb_shinfo(skb);
++      const struct sk_buff *iter;
++
++      if (shinfo->gso_size != GSO_BY_FRAGS)
++              return seg_len <= max_len;
++
++      /* Undo this so we can re-use header sizes */
++      seg_len -= GSO_BY_FRAGS;
++
++      skb_walk_frags(skb, iter) {
++              if (seg_len + skb_headlen(iter) > max_len)
++                      return false;
++      }
++
++      return true;
++}
++
++/**
++ * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
++ *
++ * @skb: GSO skb
++ * @mtu: MTU to validate against
++ *
++ * skb_gso_validate_network_len validates if a given skb will fit a
++ * wanted MTU once split. It considers L3 headers, L4 headers, and the
++ * payload.
++ */
++bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
++{
++      return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
++}
++EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
++
++/**
++ * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
++ *
++ * @skb: GSO skb
++ * @len: length to validate against
++ *
++ * skb_gso_validate_mac_len validates if a given skb will fit a wanted
++ * length once split, including L2, L3 and L4 headers and the payload.
++ */
++bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
++{
++      return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
++}
++EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
++
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 1b6a1d99869dc..593ec18e3f007 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -67,6 +67,7 @@
+ #include <net/dst.h>
+ #include <net/sock.h>
+ #include <net/checksum.h>
++#include <net/gso.h>
+ #include <net/ip6_checksum.h>
+ #include <net/xfrm.h>
+ #include <net/mpls.h>
+@@ -5789,147 +5790,6 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
+ }
+ EXPORT_SYMBOL_GPL(skb_scrub_packet);
+ 
+-/**
+- * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+- *
+- * @skb: GSO skb
+- *
+- * skb_gso_transport_seglen is used to determine the real size of the
+- * individual segments, including Layer4 headers (TCP/UDP).
+- *
+- * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+- */
+-static unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+-{
+-      const struct skb_shared_info *shinfo = skb_shinfo(skb);
+-      unsigned int thlen = 0;
+-
+-      if (skb->encapsulation) {
+-              thlen = skb_inner_transport_header(skb) -
+-                      skb_transport_header(skb);
+-
+-              if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+-                      thlen += inner_tcp_hdrlen(skb);
+-      } else if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) {
+-              thlen = tcp_hdrlen(skb);
+-      } else if (unlikely(skb_is_gso_sctp(skb))) {
+-              thlen = sizeof(struct sctphdr);
+-      } else if (shinfo->gso_type & SKB_GSO_UDP_L4) {
+-              thlen = sizeof(struct udphdr);
+-      }
+-      /* UFO sets gso_size to the size of the fragmentation
+-       * payload, i.e. the size of the L4 (UDP) header is already
+-       * accounted for.
+-       */
+-      return thlen + shinfo->gso_size;
+-}
+-
+-/**
+- * skb_gso_network_seglen - Return length of individual segments of a gso packet
+- *
+- * @skb: GSO skb
+- *
+- * skb_gso_network_seglen is used to determine the real size of the
+- * individual segments, including Layer3 (IP, IPv6) and L4 headers (TCP/UDP).
+- *
+- * The MAC/L2 header is not accounted for.
+- */
+-static unsigned int skb_gso_network_seglen(const struct sk_buff *skb)
+-{
+-      unsigned int hdr_len = skb_transport_header(skb) -
+-                             skb_network_header(skb);
+-
+-      return hdr_len + skb_gso_transport_seglen(skb);
+-}
+-
+-/**
+- * skb_gso_mac_seglen - Return length of individual segments of a gso packet
+- *
+- * @skb: GSO skb
+- *
+- * skb_gso_mac_seglen is used to determine the real size of the
+- * individual segments, including MAC/L2, Layer3 (IP, IPv6) and L4
+- * headers (TCP/UDP).
+- */
+-static unsigned int skb_gso_mac_seglen(const struct sk_buff *skb)
+-{
+-      unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb);
+-
+-      return hdr_len + skb_gso_transport_seglen(skb);
+-}
+-
+-/**
+- * skb_gso_size_check - check the skb size, considering GSO_BY_FRAGS
+- *
+- * There are a couple of instances where we have a GSO skb, and we
+- * want to determine what size it would be after it is segmented.
+- *
+- * We might want to check:
+- * -    L3+L4+payload size (e.g. IP forwarding)
+- * - L2+L3+L4+payload size (e.g. sanity check before passing to driver)
+- *
+- * This is a helper to do that correctly considering GSO_BY_FRAGS.
+- *
+- * @skb: GSO skb
+- *
+- * @seg_len: The segmented length (from skb_gso_*_seglen). In the
+- *           GSO_BY_FRAGS case this will be [header sizes + GSO_BY_FRAGS].
+- *
+- * @max_len: The maximum permissible length.
+- *
+- * Returns true if the segmented length <= max length.
+- */
+-static inline bool skb_gso_size_check(const struct sk_buff *skb,
+-                                    unsigned int seg_len,
+-                                    unsigned int max_len) {
+-      const struct skb_shared_info *shinfo = skb_shinfo(skb);
+-      const struct sk_buff *iter;
+-
+-      if (shinfo->gso_size != GSO_BY_FRAGS)
+-              return seg_len <= max_len;
+-
+-      /* Undo this so we can re-use header sizes */
+-      seg_len -= GSO_BY_FRAGS;
+-
+-      skb_walk_frags(skb, iter) {
+-              if (seg_len + skb_headlen(iter) > max_len)
+-                      return false;
+-      }
+-
+-      return true;
+-}
+-
+-/**
+- * skb_gso_validate_network_len - Will a split GSO skb fit into a given MTU?
+- *
+- * @skb: GSO skb
+- * @mtu: MTU to validate against
+- *
+- * skb_gso_validate_network_len validates if a given skb will fit a
+- * wanted MTU once split. It considers L3 headers, L4 headers, and the
+- * payload.
+- */
+-bool skb_gso_validate_network_len(const struct sk_buff *skb, unsigned int mtu)
+-{
+-      return skb_gso_size_check(skb, skb_gso_network_seglen(skb), mtu);
+-}
+-EXPORT_SYMBOL_GPL(skb_gso_validate_network_len);
+-
+-/**
+- * skb_gso_validate_mac_len - Will a split GSO skb fit in a given length?
+- *
+- * @skb: GSO skb
+- * @len: length to validate against
+- *
+- * skb_gso_validate_mac_len validates if a given skb will fit a wanted
+- * length once split, including L2, L3 and L4 headers and the payload.
+- */
+-bool skb_gso_validate_mac_len(const struct sk_buff *skb, unsigned int len)
+-{
+-      return skb_gso_size_check(skb, skb_gso_mac_seglen(skb), len);
+-}
+-EXPORT_SYMBOL_GPL(skb_gso_validate_mac_len);
+-
+ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
+ {
+       int mac_len, meta_len;
+diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+index 4a76ebf793b85..10ebe39dcc873 100644
+--- a/net/ipv4/af_inet.c
++++ b/net/ipv4/af_inet.c
+@@ -100,6 +100,7 @@
+ #include <net/ip_fib.h>
+ #include <net/inet_connection_sock.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ #include <net/tcp.h>
+ #include <net/udp.h>
+ #include <net/udplite.h>
+diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
+index ee848be59e65a..10e96ed6c9e39 100644
+--- a/net/ipv4/esp4_offload.c
++++ b/net/ipv4/esp4_offload.c
+@@ -17,6 +17,7 @@
+ #include <linux/err.h>
+ #include <linux/module.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/xfrm.h>
+ #include <net/esp.h>
+diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
+index 2b9cb5398335b..311e70bfce407 100644
+--- a/net/ipv4/gre_offload.c
++++ b/net/ipv4/gre_offload.c
+@@ -11,6 +11,7 @@
+ #include <net/protocol.h>
+ #include <net/gre.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ 
+ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
+                                      netdev_features_t features)
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index a1bead441026e..d95e40a47098a 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -73,6 +73,7 @@
+ #include <net/arp.h>
+ #include <net/icmp.h>
+ #include <net/checksum.h>
++#include <net/gso.h>
+ #include <net/inetpeer.h>
+ #include <net/inet_ecn.h>
+ #include <net/lwtunnel.h>
+diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
+index 4851211aa60d6..9c51ee9ccd4c0 100644
+--- a/net/ipv4/tcp_offload.c
++++ b/net/ipv4/tcp_offload.c
+@@ -9,6 +9,7 @@
+ #include <linux/indirect_call_wrapper.h>
+ #include <linux/skbuff.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ #include <net/tcp.h>
+ #include <net/protocol.h>
+ 
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 9482def1f3103..c6b790001aa77 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -103,6 +103,7 @@
+ #include <net/ip_tunnels.h>
+ #include <net/route.h>
+ #include <net/checksum.h>
++#include <net/gso.h>
+ #include <net/xfrm.h>
+ #include <trace/events/udp.h>
+ #include <linux/static_key.h>
+diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
+index 4a61832e7f69b..f402946da344b 100644
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -8,6 +8,7 @@
+ 
+ #include <linux/skbuff.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ #include <net/udp.h>
+ #include <net/protocol.h>
+ #include <net/inet_common.h>
+diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
+index 7723402689973..a189e08370a5e 100644
+--- a/net/ipv6/esp6_offload.c
++++ b/net/ipv6/esp6_offload.c
+@@ -17,6 +17,7 @@
+ #include <linux/err.h>
+ #include <linux/module.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/xfrm.h>
+ #include <net/esp.h>
+diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
+index 00dc2e3b01845..d6314287338da 100644
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -16,6 +16,7 @@
+ #include <net/tcp.h>
+ #include <net/udp.h>
+ #include <net/gro.h>
++#include <net/gso.h>
+ 
+ #include "ip6_offload.h"
+ 
+diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+index 9554cf46ed888..4a27fab1d09a3 100644
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -42,6 +42,7 @@
+ #include <net/sock.h>
+ #include <net/snmp.h>
+ 
++#include <net/gso.h>
+ #include <net/ipv6.h>
+ #include <net/ndisc.h>
+ #include <net/protocol.h>
+diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
+index e0e10f6bcdc18..09fa7a42cb937 100644
+--- a/net/ipv6/udp_offload.c
++++ b/net/ipv6/udp_offload.c
+@@ -14,6 +14,7 @@
+ #include <net/ip6_checksum.h>
+ #include "ip6_offload.h"
+ #include <net/gro.h>
++#include <net/gso.h>
+ 
+ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+                                        netdev_features_t features)
+diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
+index 13b522dab0a3d..39ca4a8fe7b32 100644
+--- a/net/mac80211/tx.c
++++ b/net/mac80211/tx.c
+@@ -26,6 +26,7 @@
+ #include <net/codel_impl.h>
+ #include <asm/unaligned.h>
+ #include <net/fq_impl.h>
++#include <net/gso.h>
+ 
+ #include "ieee80211_i.h"
+ #include "driver-ops.h"
+diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
+index dc5165d3eec4e..bf6e81d562631 100644
+--- a/net/mpls/af_mpls.c
++++ b/net/mpls/af_mpls.c
+@@ -12,6 +12,7 @@
+ #include <linux/nospec.h>
+ #include <linux/vmalloc.h>
+ #include <linux/percpu.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/dst.h>
+ #include <net/sock.h>
+diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
+index 1482259de9b5d..533d082f0701e 100644
+--- a/net/mpls/mpls_gso.c
++++ b/net/mpls/mpls_gso.c
+@@ -14,6 +14,7 @@
+ #include <linux/netdev_features.h>
+ #include <linux/netdevice.h>
+ #include <linux/skbuff.h>
++#include <net/gso.h>
+ #include <net/mpls.h>
+ 
+ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
+diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
+index 3bbaf9c7ea46a..7eba00f6c6b6a 100644
+--- a/net/netfilter/nf_flow_table_ip.c
++++ b/net/netfilter/nf_flow_table_ip.c
+@@ -8,6 +8,7 @@
+ #include <linux/ipv6.h>
+ #include <linux/netdevice.h>
+ #include <linux/if_ether.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_route.h>
+diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
+index e311462f6d98d..556bc902af00f 100644
+--- a/net/netfilter/nfnetlink_queue.c
++++ b/net/netfilter/nfnetlink_queue.c
+@@ -30,6 +30,7 @@
+ #include <linux/netfilter/nf_conntrack_common.h>
+ #include <linux/list.h>
+ #include <linux/cgroup-defs.h>
++#include <net/gso.h>
+ #include <net/sock.h>
+ #include <net/tcp_states.h>
+ #include <net/netfilter/nf_queue.h>
+diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
+index 0f23e5e8e03eb..f4a38bd6a7e04 100644
+--- a/net/nsh/nsh.c
++++ b/net/nsh/nsh.c
+@@ -8,6 +8,7 @@
+ #include <linux/module.h>
+ #include <linux/netdevice.h>
+ #include <linux/skbuff.h>
++#include <net/gso.h>
+ #include <net/nsh.h>
+ #include <net/tun_proto.h>
+ 
+diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
+index a8cf9a88758ef..8074ea00d577e 100644
+--- a/net/openvswitch/actions.c
++++ b/net/openvswitch/actions.c
+@@ -17,6 +17,7 @@
+ #include <linux/if_vlan.h>
+ 
+ #include <net/dst.h>
++#include <net/gso.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_fib.h>
+diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
+index 58f530f60172a..a6d2a0b1aa21e 100644
+--- a/net/openvswitch/datapath.c
++++ b/net/openvswitch/datapath.c
+@@ -35,6 +35,7 @@
+ #include <linux/rculist.h>
+ #include <linux/dmi.h>
+ #include <net/genetlink.h>
++#include <net/gso.h>
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+ #include <net/pkt_cls.h>
+diff --git a/net/sched/act_police.c b/net/sched/act_police.c
+index 2e9dce03d1ecc..f3121c5a85e9f 100644
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -16,6 +16,7 @@
+ #include <linux/init.h>
+ #include <linux/slab.h>
+ #include <net/act_api.h>
++#include <net/gso.h>
+ #include <net/netlink.h>
+ #include <net/pkt_cls.h>
+ #include <net/tc_act/tc_police.h>
+diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
+index 891e007d5c0bf..9cff99558694d 100644
+--- a/net/sched/sch_cake.c
++++ b/net/sched/sch_cake.c
+@@ -65,6 +65,7 @@
+ #include <linux/reciprocal_div.h>
+ #include <net/netlink.h>
+ #include <linux/if_vlan.h>
++#include <net/gso.h>
+ #include <net/pkt_sched.h>
+ #include <net/pkt_cls.h>
+ #include <net/tcp.h>
+diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
+index b93ec2a3454eb..38d9aa0cd30e7 100644
+--- a/net/sched/sch_netem.c
++++ b/net/sched/sch_netem.c
+@@ -21,6 +21,7 @@
+ #include <linux/reciprocal_div.h>
+ #include <linux/rbtree.h>
+ 
++#include <net/gso.h>
+ #include <net/netlink.h>
+ #include <net/pkt_sched.h>
+ #include <net/inet_ecn.h>
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index 4caf80ddc6721..f681af138179c 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -20,6 +20,7 @@
+ #include <linux/spinlock.h>
+ #include <linux/rcupdate.h>
+ #include <linux/time.h>
++#include <net/gso.h>
+ #include <net/netlink.h>
+ #include <net/pkt_sched.h>
+ #include <net/pkt_cls.h>
+diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
+index 277ad11f4d613..17d2d00ddb182 100644
+--- a/net/sched/sch_tbf.c
++++ b/net/sched/sch_tbf.c
+@@ -13,6 +13,7 @@
+ #include <linux/string.h>
+ #include <linux/errno.h>
+ #include <linux/skbuff.h>
++#include <net/gso.h>
+ #include <net/netlink.h>
+ #include <net/sch_generic.h>
+ #include <net/pkt_cls.h>
+diff --git a/net/sctp/offload.c b/net/sctp/offload.c
+index eb874e3c399a5..502095173d885 100644
+--- a/net/sctp/offload.c
++++ b/net/sctp/offload.c
+@@ -22,6 +22,7 @@
+ #include <net/sctp/sctp.h>
+ #include <net/sctp/checksum.h>
+ #include <net/protocol.h>
++#include <net/gso.h>
+ 
+ static __le32 sctp_gso_make_checksum(struct sk_buff *skb)
+ {
+diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
+index 408f5e55744ed..533697e2488f2 100644
+--- a/net/xfrm/xfrm_device.c
++++ b/net/xfrm/xfrm_device.c
+@@ -15,6 +15,7 @@
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+ #include <net/dst.h>
++#include <net/gso.h>
+ #include <net/xfrm.h>
+ #include <linux/notifier.h>
+ 
+diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
+index 35279c220bd78..a3319965470a7 100644
+--- a/net/xfrm/xfrm_interface_core.c
++++ b/net/xfrm/xfrm_interface_core.c
+@@ -33,6 +33,7 @@
+ #include <linux/uaccess.h>
+ #include <linux/atomic.h>
+ 
++#include <net/gso.h>
+ #include <net/icmp.h>
+ #include <net/ip.h>
+ #include <net/ipv6.h>
+diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
+index 369e5de8558ff..662c83beb345e 100644
+--- a/net/xfrm/xfrm_output.c
++++ b/net/xfrm/xfrm_output.c
+@@ -13,6 +13,7 @@
+ #include <linux/slab.h>
+ #include <linux/spinlock.h>
+ #include <net/dst.h>
++#include <net/gso.h>
+ #include <net/icmp.h>
+ #include <net/inet_ecn.h>
+ #include <net/xfrm.h>
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-netsec-ignore-phy-mode-on-synquacer-in-dt-mode.patch b/queue-6.4/net-netsec-ignore-phy-mode-on-synquacer-in-dt-mode.patch

new file mode 100644 (file)

index 0000000..b2b05b3
--- /dev/null
+++ b/queue-6.4/net-netsec-ignore-phy-mode-on-synquacer-in-dt-mode.patch
@@ -0,0 +1,61 @@
+From 9411e5a24a1be2bf22b36952de69a0d5a47f05d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 11:48:32 +0100
+Subject: net: netsec: Ignore 'phy-mode' on SynQuacer in DT mode
+
+From: Mark Brown <broonie@kernel.org>
+
+[ Upstream commit f3bb7759a924713bc54d15f6d0d70733b5935fad ]
+
+As documented in acd7aaf51b20 ("netsec: ignore 'phy-mode' device
+property on ACPI systems") the SocioNext SynQuacer platform ships with
+firmware defining the PHY mode as RGMII even though the physical
+configuration of the PHY is for TX and RX delays.  Since bbc4d71d63549bc
+("net: phy: realtek: fix rtl8211e rx/tx delay config") this has caused
+misconfiguration of the PHY, rendering the network unusable.
+
+This was worked around for ACPI by ignoring the phy-mode property but
+the system is also used with DT.  For DT instead if we're running on a
+SynQuacer force a working PHY mode, as well as the standard EDK2
+firmware with DT there are also some of these systems that use u-boot
+and might not initialise the PHY if not netbooting.  Newer firmware
+imagaes for at least EDK2 are available from Linaro so print a warning
+when doing this.
+
+Fixes: 533dd11a12f6 ("net: socionext: Add Synquacer NetSec driver")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Acked-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20230731-synquacer-net-v3-1-944be5f06428@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/socionext/netsec.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
+index 2d7347b71c41b..0dcd6a568b061 100644
+--- a/drivers/net/ethernet/socionext/netsec.c
++++ b/drivers/net/ethernet/socionext/netsec.c
+@@ -1851,6 +1851,17 @@ static int netsec_of_probe(struct platform_device *pdev,
+               return err;
+       }
+ 
++      /*
++       * SynQuacer is physically configured with TX and RX delays
++       * but the standard firmware claimed otherwise for a long
++       * time, ignore it.
++       */
++      if (of_machine_is_compatible("socionext,developer-box") &&
++          priv->phy_interface != PHY_INTERFACE_MODE_RGMII_ID) {
++              dev_warn(&pdev->dev, "Outdated firmware reports incorrect PHY mode, overriding\n");
++              priv->phy_interface = PHY_INTERFACE_MODE_RGMII_ID;
++      }
++
+       priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0);
+       if (!priv->phy_np) {
+               dev_err(&pdev->dev, "missing required property 'phy-handle'\n");
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch b/queue-6.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch

new file mode 100644 (file)

index 0000000..785917c
--- /dev/null
+++ b/queue-6.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch
@@ -0,0 +1,50 @@
+From 807a531666ec7a4bcbc9d42e176ec2caa0d4cc4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:01 -0400
+Subject: net/sched: cls_fw: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 76e42ae831991c828cffa8c37736ebfb831ad5ec ]
+
+When fw_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: e35a8ee5993b ("net: sched: fw use RCU")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-3-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_fw.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c
+index 8641f80593179..c49d6af0e0480 100644
+--- a/net/sched/cls_fw.c
++++ b/net/sched/cls_fw.c
+@@ -267,7 +267,6 @@ static int fw_change(struct net *net, struct sk_buff *in_skb,
+                       return -ENOBUFS;
+ 
+               fnew->id = f->id;
+-              fnew->res = f->res;
+               fnew->ifindex = f->ifindex;
+               fnew->tp = f->tp;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch b/queue-6.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch

new file mode 100644 (file)

index 0000000..f6ebb92
--- /dev/null
+++ b/queue-6.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch
@@ -0,0 +1,50 @@
+From 13349ab8056cf73b5496c1b8f580281c074c58f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:02 -0400
+Subject: net/sched: cls_route: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit b80b829e9e2c1b3f7aae34855e04d8f6ecaf13c8 ]
+
+When route4_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: 1109c00547fc ("net: sched: RCU cls_route")
+Reported-by: valis <sec@valis.email>
+Reported-by: Bing-Jhong Billy Jheng <billy@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-4-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_route.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
+index d0c53724d3e86..1e20bbd687f1d 100644
+--- a/net/sched/cls_route.c
++++ b/net/sched/cls_route.c
+@@ -513,7 +513,6 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
+       if (fold) {
+               f->id = fold->id;
+               f->iif = fold->iif;
+-              f->res = fold->res;
+               f->handle = fold->handle;
+ 
+               f->tp = fold->tp;
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch b/queue-6.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch

new file mode 100644 (file)

index 0000000..9e8a557
--- /dev/null
+++ b/queue-6.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch
@@ -0,0 +1,145 @@
+From 2a3346654945e131ad8a2561d0215b22bbeda579 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 09:51:51 -0400
+Subject: net: sched: cls_u32: Fix match key mis-addressing
+
+From: Jamal Hadi Salim <jhs@mojatatu.com>
+
+[ Upstream commit e68409db995380d1badacba41ff24996bd396171 ]
+
+A match entry is uniquely identified with an "address" or "path" in the
+form of: hashtable ID(12b):bucketid(8b):nodeid(12b).
+
+When creating table match entries all of hash table id, bucket id and
+node (match entry id) are needed to be either specified by the user or
+reasonable in-kernel defaults are used. The in-kernel default for a table id is
+0x800(omnipresent root table); for bucketid it is 0x0. Prior to this fix there
+was none for a nodeid i.e. the code assumed that the user passed the correct
+nodeid and if the user passes a nodeid of 0 (as Mingi Cho did) then that is what
+was used. But nodeid of 0 is reserved for identifying the table. This is not
+a problem until we dump. The dump code notices that the nodeid is zero and
+assumes it is referencing a table and therefore references table struct
+tc_u_hnode instead of what was created i.e match entry struct tc_u_knode.
+
+Ming does an equivalent of:
+tc filter add dev dummy0 parent 10: prio 1 handle 0x1000 \
+protocol ip u32 match ip src 10.0.0.1/32 classid 10:1 action ok
+
+Essentially specifying a table id 0, bucketid 1 and nodeid of zero
+Tableid 0 is remapped to the default of 0x800.
+Bucketid 1 is ignored and defaults to 0x00.
+Nodeid was assumed to be what Ming passed - 0x000
+
+dumping before fix shows:
+~$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor -30591
+
+Note that the last line reports a table instead of a match entry
+(you can tell this because it says "ht divisor...").
+As a result of reporting the wrong data type (misinterpretting of struct
+tc_u_knode as being struct tc_u_hnode) the divisor is reported with value
+of -30591. Ming identified this as part of the heap address
+(physmap_base is 0xffff8880 (-30591 - 1)).
+
+The fix is to ensure that when table entry matches are added and no
+nodeid is specified (i.e nodeid == 0) then we get the next available
+nodeid from the table's pool.
+
+After the fix, this is what the dump shows:
+$ tc filter ls dev dummy0 parent 10:
+filter protocol ip pref 1 u32 chain 0
+filter protocol ip pref 1 u32 chain 0 fh 800: ht divisor 1
+filter protocol ip pref 1 u32 chain 0 fh 800::800 order 2048 key ht 800 bkt 0 flowid 10:1 not_in_hw
+  match 0a000001/ffffffff at 12
+       action order 1: gact action pass
+        random type none pass val 0
+        index 1 ref 1 bind 1
+
+Reported-by: Mingi Cho <mgcho.minic@gmail.com>
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Link: https://lore.kernel.org/r/20230726135151.416917-1-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 56 ++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 50 insertions(+), 6 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 5abf31e432caf..907e58841fe80 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -1024,18 +1024,62 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+               return -EINVAL;
+       }
+ 
++      /* At this point, we need to derive the new handle that will be used to
++       * uniquely map the identity of this table match entry. The
++       * identity of the entry that we need to construct is 32 bits made of:
++       *     htid(12b):bucketid(8b):node/entryid(12b)
++       *
++       * At this point _we have the table(ht)_ in which we will insert this
++       * entry. We carry the table's id in variable "htid".
++       * Note that earlier code picked the ht selection either by a) the user
++       * providing the htid specified via TCA_U32_HASH attribute or b) when
++       * no such attribute is passed then the root ht, is default to at ID
++       * 0x[800][00][000]. Rule: the root table has a single bucket with ID 0.
++       * If OTOH the user passed us the htid, they may also pass a bucketid of
++       * choice. 0 is fine. For example a user htid is 0x[600][01][000] it is
++       * indicating hash bucketid of 1. Rule: the entry/node ID _cannot_ be
++       * passed via the htid, so even if it was non-zero it will be ignored.
++       *
++       * We may also have a handle, if the user passed one. The handle also
++       * carries the same addressing of htid(12b):bucketid(8b):node/entryid(12b).
++       * Rule: the bucketid on the handle is ignored even if one was passed;
++       * rather the value on "htid" is always assumed to be the bucketid.
++       */
+       if (handle) {
++              /* Rule: The htid from handle and tableid from htid must match */
+               if (TC_U32_HTID(handle) && TC_U32_HTID(handle ^ htid)) {
+                       NL_SET_ERR_MSG_MOD(extack, "Handle specified hash table address mismatch");
+                       return -EINVAL;
+               }
+-              handle = htid | TC_U32_NODE(handle);
+-              err = idr_alloc_u32(&ht->handle_idr, NULL, &handle, handle,
+-                                  GFP_KERNEL);
+-              if (err)
+-                      return err;
+-      } else
++              /* Ok, so far we have a valid htid(12b):bucketid(8b) but we
++               * need to finalize the table entry identification with the last
++               * part - the node/entryid(12b)). Rule: Nodeid _cannot be 0_ for
++               * entries. Rule: nodeid of 0 is reserved only for tables(see
++               * earlier code which processes TC_U32_DIVISOR attribute).
++               * Rule: The nodeid can only be derived from the handle (and not
++               * htid).
++               * Rule: if the handle specified zero for the node id example
++               * 0x60000000, then pick a new nodeid from the pool of IDs
++               * this hash table has been allocating from.
++               * If OTOH it is specified (i.e for example the user passed a
++               * handle such as 0x60000123), then we use it generate our final
++               * handle which is used to uniquely identify the match entry.
++               */
++              if (!TC_U32_NODE(handle)) {
++                      handle = gen_new_kid(ht, htid);
++              } else {
++                      handle = htid | TC_U32_NODE(handle);
++                      err = idr_alloc_u32(&ht->handle_idr, NULL, &handle,
++                                          handle, GFP_KERNEL);
++                      if (err)
++                              return err;
++              }
++      } else {
++              /* The user did not give us a handle; lets just generate one
++               * from the table's pool of nodeids.
++               */
+               handle = gen_new_kid(ht, htid);
++      }
+ 
+       if (tb[TCA_U32_SEL] == NULL) {
+               NL_SET_ERR_MSG_MOD(extack, "Selector not specified");
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch b/queue-6.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch

new file mode 100644 (file)

index 0000000..b37a82e
--- /dev/null
+++ b/queue-6.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch
@@ -0,0 +1,50 @@
+From 524909485b940f1f1002d627fcdfc5d7b7c9f5e6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 08:32:00 -0400
+Subject: net/sched: cls_u32: No longer copy tcf_result on update to avoid
+ use-after-free
+
+From: valis <sec@valis.email>
+
+[ Upstream commit 3044b16e7c6fe5d24b1cdbcf1bd0a9d92d1ebd81 ]
+
+When u32_change() is called on an existing filter, the whole
+tcf_result struct is always copied into the new instance of the filter.
+
+This causes a problem when updating a filter bound to a class,
+as tcf_unbind_filter() is always called on the old instance in the
+success path, decreasing filter_cnt of the still referenced class
+and allowing it to be deleted, leading to a use-after-free.
+
+Fix this by no longer copying the tcf_result struct from the old filter.
+
+Fixes: de5df63228fc ("net: sched: cls_u32 changes to knode must appear atomic to readers")
+Reported-by: valis <sec@valis.email>
+Reported-by: M A Ramdhan <ramdhan@starlabs.sg>
+Signed-off-by: valis <sec@valis.email>
+Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Victor Nogueira <victor@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: M A Ramdhan <ramdhan@starlabs.sg>
+Link: https://lore.kernel.org/r/20230729123202.72406-2-jhs@mojatatu.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 907e58841fe80..da4c179a4d418 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -826,7 +826,6 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp,
+ 
+       new->ifindex = n->ifindex;
+       new->fshift = n->fshift;
+-      new->res = n->res;
+       new->flags = n->flags;
+       RCU_INIT_POINTER(new->ht_down, ht);
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-sched-taprio-limit-tca_taprio_attr_sched_cycle_t.patch b/queue-6.4/net-sched-taprio-limit-tca_taprio_attr_sched_cycle_t.patch

new file mode 100644 (file)

index 0000000..b2d02bd
--- /dev/null
+++ b/queue-6.4/net-sched-taprio-limit-tca_taprio_attr_sched_cycle_t.patch
@@ -0,0 +1,176 @@
+From 3769598a48520c120488de5d2880d5c1f204d2d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 17:07:05 -0700
+Subject: net/sched: taprio: Limit TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME to INT_MAX.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit e739718444f7bf2fa3d70d101761ad83056ca628 ]
+
+syzkaller found zero division error [0] in div_s64_rem() called from
+get_cycle_time_elapsed(), where sched->cycle_time is the divisor.
+
+We have tests in parse_taprio_schedule() so that cycle_time will never
+be 0, and actually cycle_time is not 0 in get_cycle_time_elapsed().
+
+The problem is that the types of divisor are different; cycle_time is
+s64, but the argument of div_s64_rem() is s32.
+
+syzkaller fed this input and 0x100000000 is cast to s32 to be 0.
+
+  @TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME={0xc, 0x8, 0x100000000}
+
+We use s64 for cycle_time to cast it to ktime_t, so let's keep it and
+set max for cycle_time.
+
+While at it, we prevent overflow in setup_txtime() and add another
+test in parse_taprio_schedule() to check if cycle_time overflows.
+
+Also, we add a new tdc test case for this issue.
+
+[0]:
+divide error: 0000 [#1] PREEMPT SMP KASAN NOPTI
+CPU: 1 PID: 103 Comm: kworker/1:3 Not tainted 6.5.0-rc1-00330-g60cc1f7d0605 #3
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
+Workqueue: ipv6_addrconf addrconf_dad_work
+RIP: 0010:div_s64_rem include/linux/math64.h:42 [inline]
+RIP: 0010:get_cycle_time_elapsed net/sched/sch_taprio.c:223 [inline]
+RIP: 0010:find_entry_to_transmit+0x252/0x7e0 net/sched/sch_taprio.c:344
+Code: 3c 02 00 0f 85 5e 05 00 00 48 8b 4c 24 08 4d 8b bd 40 01 00 00 48 8b 7c 24 48 48 89 c8 4c 29 f8 48 63 f7 48 99 48 89 74 24 70 <48> f7 fe 48 29 d1 48 8d 04 0f 49 89 cc 48 89 44 24 20 49 8d 85 10
+RSP: 0018:ffffc90000acf260 EFLAGS: 00010206
+RAX: 177450e0347560cf RBX: 0000000000000000 RCX: 177450e0347560cf
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000100000000
+RBP: 0000000000000056 R08: 0000000000000000 R09: ffffed10020a0934
+R10: ffff8880105049a7 R11: ffff88806cf3a520 R12: ffff888010504800
+R13: ffff88800c00d800 R14: ffff8880105049a0 R15: 0000000000000000
+FS:  0000000000000000(0000) GS:ffff88806cf00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007f0edf84f0e8 CR3: 000000000d73c002 CR4: 0000000000770ee0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ get_packet_txtime net/sched/sch_taprio.c:508 [inline]
+ taprio_enqueue_one+0x900/0xff0 net/sched/sch_taprio.c:577
+ taprio_enqueue+0x378/0xae0 net/sched/sch_taprio.c:658
+ dev_qdisc_enqueue+0x46/0x170 net/core/dev.c:3732
+ __dev_xmit_skb net/core/dev.c:3821 [inline]
+ __dev_queue_xmit+0x1b2f/0x3000 net/core/dev.c:4169
+ dev_queue_xmit include/linux/netdevice.h:3088 [inline]
+ neigh_resolve_output net/core/neighbour.c:1552 [inline]
+ neigh_resolve_output+0x4a7/0x780 net/core/neighbour.c:1532
+ neigh_output include/net/neighbour.h:544 [inline]
+ ip6_finish_output2+0x924/0x17d0 net/ipv6/ip6_output.c:135
+ __ip6_finish_output+0x620/0xaa0 net/ipv6/ip6_output.c:196
+ ip6_finish_output net/ipv6/ip6_output.c:207 [inline]
+ NF_HOOK_COND include/linux/netfilter.h:292 [inline]
+ ip6_output+0x206/0x410 net/ipv6/ip6_output.c:228
+ dst_output include/net/dst.h:458 [inline]
+ NF_HOOK.constprop.0+0xea/0x260 include/linux/netfilter.h:303
+ ndisc_send_skb+0x872/0xe80 net/ipv6/ndisc.c:508
+ ndisc_send_ns+0xb5/0x130 net/ipv6/ndisc.c:666
+ addrconf_dad_work+0xc14/0x13f0 net/ipv6/addrconf.c:4175
+ process_one_work+0x92c/0x13a0 kernel/workqueue.c:2597
+ worker_thread+0x60f/0x1240 kernel/workqueue.c:2748
+ kthread+0x2fe/0x3f0 kernel/kthread.c:389
+ ret_from_fork+0x2c/0x50 arch/x86/entry/entry_64.S:308
+ </TASK>
+Modules linked in:
+
+Fixes: 4cfd5779bd6e ("taprio: Add support for txtime-assist mode")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Co-developed-by: Eric Dumazet <edumazet@google.com>
+Co-developed-by: Pedro Tammela <pctammela@mojatatu.com>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/sch_taprio.c                        | 15 +++++++++--
+ .../tc-testing/tc-tests/qdiscs/taprio.json    | 25 +++++++++++++++++++
+ 2 files changed, 38 insertions(+), 2 deletions(-)
+
+diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
+index f681af138179c..97afa244e54f5 100644
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1013,6 +1013,11 @@ static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = {
+                                                             TC_FP_PREEMPTIBLE),
+ };
+ 
++static struct netlink_range_validation_signed taprio_cycle_time_range = {
++      .min = 0,
++      .max = INT_MAX,
++};
++
+ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+       [TCA_TAPRIO_ATTR_PRIOMAP]              = {
+               .len = sizeof(struct tc_mqprio_qopt)
+@@ -1021,7 +1026,8 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = {
+       [TCA_TAPRIO_ATTR_SCHED_BASE_TIME]            = { .type = NLA_S64 },
+       [TCA_TAPRIO_ATTR_SCHED_SINGLE_ENTRY]         = { .type = NLA_NESTED },
+       [TCA_TAPRIO_ATTR_SCHED_CLOCKID]              = { .type = NLA_S32 },
+-      [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           = { .type = NLA_S64 },
++      [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME]           =
++              NLA_POLICY_FULL_RANGE_SIGNED(NLA_S64, &taprio_cycle_time_range),
+       [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 },
+       [TCA_TAPRIO_ATTR_FLAGS]                      = { .type = NLA_U32 },
+       [TCA_TAPRIO_ATTR_TXTIME_DELAY]               = { .type = NLA_U32 },
+@@ -1157,6 +1163,11 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
+                       return -EINVAL;
+               }
+ 
++              if (cycle < 0 || cycle > INT_MAX) {
++                      NL_SET_ERR_MSG(extack, "'cycle_time' is too big");
++                      return -EINVAL;
++              }
++
+               new->cycle_time = cycle;
+       }
+ 
+@@ -1345,7 +1356,7 @@ static void setup_txtime(struct taprio_sched *q,
+                        struct sched_gate_list *sched, ktime_t base)
+ {
+       struct sched_entry *entry;
+-      u32 interval = 0;
++      u64 interval = 0;
+ 
+       list_for_each_entry(entry, &sched->entries, list) {
+               entry->next_txtime = ktime_add_ns(base, interval);
+diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+index a44455372646a..08d4861c2e782 100644
+--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
++++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+@@ -131,5 +131,30 @@
+         "teardown": [
+             "echo \"1\" > /sys/bus/netdevsim/del_device"
+         ]
++    },
++    {
++        "id": "3e1e",
++        "name": "Add taprio Qdisc with an invalid cycle-time",
++        "category": [
++            "qdisc",
++            "taprio"
++        ],
++        "plugins": {
++            "requires": "nsPlugin"
++        },
++        "setup": [
++            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
++            "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
++            "$IP link set dev $ETH up",
++            "$IP addr add 10.10.10.10/24 dev $ETH"
++        ],
++        "cmdUnderTest": "/bin/true",
++        "expExitCode": "0",
++        "verifyCmd": "$TC qdisc show dev $ETH",
++        "matchPattern": "qdisc taprio 1: root refcnt",
++        "matchCount": "0",
++        "teardown": [
++            "echo \"1\" > /sys/bus/netdevsim/del_device"
++        ]
+     }
+ ]
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-stmmac-tegra-properly-allocate-clock-bulk-data.patch b/queue-6.4/net-stmmac-tegra-properly-allocate-clock-bulk-data.patch

new file mode 100644 (file)

index 0000000..553a2eb
--- /dev/null
+++ b/queue-6.4/net-stmmac-tegra-properly-allocate-clock-bulk-data.patch
@@ -0,0 +1,38 @@
+From 026f2206059972685acfc11b485c7197df34e2b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 18:32:00 +0200
+Subject: net: stmmac: tegra: Properly allocate clock bulk data
+
+From: Thierry Reding <treding@nvidia.com>
+
+[ Upstream commit a0b1b2055be34c0ec1371764d040164cde1ead79 ]
+
+The clock data is an array of struct clk_bulk_data, so make sure to
+allocate enough memory.
+
+Fixes: d8ca113724e7 ("net: stmmac: tegra: Add MGBE support")
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+index bdf990cf2f310..0880048ccdddc 100644
+--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
++++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-tegra.c
+@@ -234,7 +234,8 @@ static int tegra_mgbe_probe(struct platform_device *pdev)
+       res.addr = mgbe->regs;
+       res.irq = irq;
+ 
+-      mgbe->clks = devm_kzalloc(&pdev->dev, sizeof(*mgbe->clks), GFP_KERNEL);
++      mgbe->clks = devm_kcalloc(&pdev->dev, ARRAY_SIZE(mgbe_clks),
++                                sizeof(*mgbe->clks), GFP_KERNEL);
+       if (!mgbe->clks)
+               return -ENOMEM;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/net-usb-lan78xx-reorder-cleanup-operations-to-avoid-.patch b/queue-6.4/net-usb-lan78xx-reorder-cleanup-operations-to-avoid-.patch

new file mode 100644 (file)

index 0000000..e603b7c
--- /dev/null
+++ b/queue-6.4/net-usb-lan78xx-reorder-cleanup-operations-to-avoid-.patch
@@ -0,0 +1,103 @@
+From 8e93dc2d5190b8a686c47711df216004dafab779 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 16:14:07 +0800
+Subject: net: usb: lan78xx: reorder cleanup operations to avoid UAF bugs
+
+From: Duoming Zhou <duoming@zju.edu.cn>
+
+[ Upstream commit 1e7417c188d0a83fb385ba2dbe35fd2563f2b6f3 ]
+
+The timer dev->stat_monitor can schedule the delayed work dev->wq and
+the delayed work dev->wq can also arm the dev->stat_monitor timer.
+
+When the device is detaching, the net_device will be deallocated. but
+the net_device private data could still be dereferenced in delayed work
+or timer handler. As a result, the UAF bugs will happen.
+
+One racy situation is shown below:
+
+      (Thread 1)                 |      (Thread 2)
+lan78xx_stat_monitor()           |
+ ...                             |  lan78xx_disconnect()
+ lan78xx_defer_kevent()          |    ...
+  ...                            |    cancel_delayed_work_sync(&dev->wq);
+  schedule_delayed_work()        |    ...
+  (wait some time)               |    free_netdev(net); //free net_device
+  lan78xx_delayedwork()          |
+  //use net_device private data  |
+  dev-> //use                    |
+
+Although we use cancel_delayed_work_sync() to cancel the delayed work
+in lan78xx_disconnect(), it could still be scheduled in timer handler
+lan78xx_stat_monitor().
+
+Another racy situation is shown below:
+
+      (Thread 1)                |      (Thread 2)
+lan78xx_delayedwork             |
+ mod_timer()                    |  lan78xx_disconnect()
+                                |   cancel_delayed_work_sync()
+ (wait some time)               |   if (timer_pending(&dev->stat_monitor))
+                               |       del_timer_sync(&dev->stat_monitor);
+ lan78xx_stat_monitor()         |   ...
+  lan78xx_defer_kevent()        |   free_netdev(net); //free
+   //use net_device private data|
+   dev-> //use                  |
+
+Although we use del_timer_sync() to delete the timer, the function
+timer_pending() returns 0 when the timer is activated. As a result,
+the del_timer_sync() will not be executed and the timer could be
+re-armed.
+
+In order to mitigate this bug, We use timer_shutdown_sync() to shutdown
+the timer and then use cancel_delayed_work_sync() to cancel the delayed
+work. As a result, the net_device could be deallocated safely.
+
+What's more, the dev->flags is set to EVENT_DEV_DISCONNECT in
+lan78xx_disconnect(). But it could still be set to EVENT_STAT_UPDATE
+in lan78xx_stat_monitor(). So this patch put the set_bit() behind
+timer_shutdown_sync().
+
+Fixes: 77dfff5bb7e2 ("lan78xx: Fix race condition in disconnect handling")
+Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/lan78xx.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c
+index c458c030fadf6..59cde06aa7f60 100644
+--- a/drivers/net/usb/lan78xx.c
++++ b/drivers/net/usb/lan78xx.c
+@@ -4224,8 +4224,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
+       if (!dev)
+               return;
+ 
+-      set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
+-
+       netif_napi_del(&dev->napi);
+ 
+       udev = interface_to_usbdev(intf);
+@@ -4233,6 +4231,8 @@ static void lan78xx_disconnect(struct usb_interface *intf)
+ 
+       unregister_netdev(net);
+ 
++      timer_shutdown_sync(&dev->stat_monitor);
++      set_bit(EVENT_DEV_DISCONNECT, &dev->flags);
+       cancel_delayed_work_sync(&dev->wq);
+ 
+       phydev = net->phydev;
+@@ -4247,9 +4247,6 @@ static void lan78xx_disconnect(struct usb_interface *intf)
+ 
+       usb_scuttle_anchored_urbs(&dev->deferred);
+ 
+-      if (timer_pending(&dev->stat_monitor))
+-              del_timer_sync(&dev->stat_monitor);
+-
+       lan78xx_unbind(dev, intf);
+ 
+       lan78xx_free_tx_resources(dev);
+-- 
+2.40.1
+
diff --git a/queue-6.4/octeon_ep-initialize-mbox-mutexes.patch b/queue-6.4/octeon_ep-initialize-mbox-mutexes.patch

new file mode 100644 (file)

index 0000000..97343bc
--- /dev/null
+++ b/queue-6.4/octeon_ep-initialize-mbox-mutexes.patch
@@ -0,0 +1,43 @@
+From 6cb366dae6c538d9fd911d5fe718512e51e8112e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 29 Jul 2023 17:15:16 +0200
+Subject: octeon_ep: initialize mbox mutexes
+
+From: Michal Schmidt <mschmidt@redhat.com>
+
+[ Upstream commit 611e1b016c7beceec5ae82ac62d4a7ca224c8f9d ]
+
+The two mbox-related mutexes are destroyed in octep_ctrl_mbox_uninit(),
+but the corresponding mutex_init calls were missing.
+A "DEBUG_LOCKS_WARN_ON(lock->magic != lock)" warning was emitted with
+CONFIG_DEBUG_MUTEXES on.
+
+Initialize the two mutexes in octep_ctrl_mbox_init().
+
+Fixes: 577f0d1b1c5f ("octeon_ep: add separate mailbox command and response queues")
+Signed-off-by: Michal Schmidt <mschmidt@redhat.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Link: https://lore.kernel.org/r/20230729151516.24153-1-mschmidt@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
+index 035ead7935c74..dab61cc1acb57 100644
+--- a/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
++++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ctrl_mbox.c
+@@ -98,6 +98,9 @@ int octep_ctrl_mbox_init(struct octep_ctrl_mbox *mbox)
+       writeq(OCTEP_CTRL_MBOX_STATUS_INIT,
+              OCTEP_CTRL_MBOX_INFO_HOST_STATUS(mbox->barmem));
+ 
++      mutex_init(&mbox->h2fq_lock);
++      mutex_init(&mbox->f2hq_lock);
++
+       mbox->h2fq.sz = readl(OCTEP_CTRL_MBOX_H2FQ_SZ(mbox->barmem));
+       mbox->h2fq.hw_prod = OCTEP_CTRL_MBOX_H2FQ_PROD(mbox->barmem);
+       mbox->h2fq.hw_cons = OCTEP_CTRL_MBOX_H2FQ_CONS(mbox->barmem);
+-- 
+2.40.1
+
diff --git a/queue-6.4/perf-pmu-arm64-fix-reading-the-pmu-cpu-slots-in-sysf.patch b/queue-6.4/perf-pmu-arm64-fix-reading-the-pmu-cpu-slots-in-sysf.patch

new file mode 100644 (file)

index 0000000..ea48951
--- /dev/null
+++ b/queue-6.4/perf-pmu-arm64-fix-reading-the-pmu-cpu-slots-in-sysf.patch
@@ -0,0 +1,63 @@
+From bc3b6341fa238a203e8e0c1c2b80228c015dead5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jul 2023 13:06:54 +0800
+Subject: perf pmu arm64: Fix reading the PMU cpu slots in sysfs
+
+From: Haixin Yu <yuhaixin.yhx@linux.alibaba.com>
+
+[ Upstream commit 9754353d0ab123d71bf572a483ecc8b330ef36a3 ]
+
+Commit f8ad6018ce3c065a ("perf pmu: Remove duplication around
+EVENT_SOURCE_DEVICE_PATH") uses sysfs__read_ull() to read a full sysfs
+path, which will never succeeds as it already comes with the sysfs mount
+point in it, which sysfs__read_ull() will add again.
+
+Fix it by reading the file using filename__read_ull(), that will not add
+the sysfs mount point.
+
+Fixes: f8ad6018ce3c065a ("perf pmu: Remove duplication around EVENT_SOURCE_DEVICE_PATH")
+Signed-off-by: Haixin Yu <yuhaixin.yhx@linux.alibaba.com>
+Tested-by: Jing Zhang <renyu.zj@linux.alibaba.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: James Clark <james.clark@arm.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: John Garry <john.g.garry@oracle.com>
+Cc: Leo Yan <leo.yan@linaro.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mike Leach <mike.leach@linaro.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Will Deacon <will@kernel.org>
+Cc: linux-arm-kernel@lists.infradead.org
+Link: https://lore.kernel.org/r/ZL4G7rWXkfv-Ectq@B-Q60VQ05P-2326.local
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/arch/arm64/util/pmu.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
+index ef1ed645097c6..ce0d1c7578348 100644
+--- a/tools/perf/arch/arm64/util/pmu.c
++++ b/tools/perf/arch/arm64/util/pmu.c
+@@ -56,10 +56,11 @@ double perf_pmu__cpu_slots_per_cycle(void)
+               perf_pmu__pathname_scnprintf(path, sizeof(path),
+                                            pmu->name, "caps/slots");
+               /*
+-               * The value of slots is not greater than 32 bits, but sysfs__read_int
+-               * can't read value with 0x prefix, so use sysfs__read_ull instead.
++               * The value of slots is not greater than 32 bits, but
++               * filename__read_int can't read value with 0x prefix,
++               * so use filename__read_ull instead.
+                */
+-              sysfs__read_ull(path, &slots);
++              filename__read_ull(path, &slots);
+       }
+ 
+       return slots ? (double)slots : NAN;
+-- 
+2.40.1
+
diff --git a/queue-6.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch b/queue-6.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch

new file mode 100644 (file)

index 0000000..dd7d0ae
--- /dev/null
+++ b/queue-6.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch
@@ -0,0 +1,66 @@
+From c07e960593d24ff2cbf84655b75f95a808d50c1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 28 Jul 2023 17:18:12 +0200
+Subject: perf test uprobe_from_different_cu: Skip if there is no gcc
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Georg Müller <georgmueller@gmx.net>
+
+[ Upstream commit 98ce8e4a9dcfb448b30a2d7a16190f4a00382377 ]
+
+Without gcc, the test will fail.
+
+On cleanup, ignore probe removal errors. Otherwise, in case of an error
+adding the probe, the temporary directory is not removed.
+
+Fixes: 56cbeacf14353057 ("perf probe: Add test for regression introduced by switch to die_get_decl_file()")
+Signed-off-by: Georg Müller <georgmueller@gmx.net>
+Acked-by: Ian Rogers <irogers@google.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Georg Müller <georgmueller@gmx.net>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Link: https://lore.kernel.org/r/20230728151812.454806-2-georgmueller@gmx.net
+Link: https://lore.kernel.org/r/CAP-5=fUP6UuLgRty3t2=fQsQi3k4hDMz415vWdp1x88QMvZ8ug@mail.gmail.com/
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/tests/shell/test_uprobe_from_different_cu.sh | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+index 00d2e0e2e0c28..319f36ebb9a40 100644
+--- a/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
++++ b/tools/perf/tests/shell/test_uprobe_from_different_cu.sh
+@@ -4,6 +4,12 @@
+ 
+ set -e
+ 
++# skip if there's no gcc
++if ! [ -x "$(command -v gcc)" ]; then
++        echo "failed: no gcc compiler"
++        exit 2
++fi
++
+ temp_dir=$(mktemp -d /tmp/perf-uprobe-different-cu-sh.XXXXXXXXXX)
+ 
+ cleanup()
+@@ -11,7 +17,7 @@ cleanup()
+       trap - EXIT TERM INT
+       if [[ "${temp_dir}" =~ ^/tmp/perf-uprobe-different-cu-sh.*$ ]]; then
+               echo "--- Cleaning up ---"
+-              perf probe -x ${temp_dir}/testfile -d foo
++              perf probe -x ${temp_dir}/testfile -d foo || true
+               rm -f "${temp_dir}/"*
+               rmdir "${temp_dir}"
+       fi
+-- 
+2.40.1
+
diff --git a/queue-6.4/prestera-fix-fallback-to-previous-version-on-same-ma.patch b/queue-6.4/prestera-fix-fallback-to-previous-version-on-same-ma.patch

new file mode 100644 (file)

index 0000000..dfe817f
--- /dev/null
+++ b/queue-6.4/prestera-fix-fallback-to-previous-version-on-same-ma.patch
@@ -0,0 +1,66 @@
+From 8853150367113770d8f57240e2a439b0f2f1d7e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 11:23:56 +0200
+Subject: prestera: fix fallback to previous version on same major version
+
+From: Jonas Gorski <jonas.gorski@bisdn.de>
+
+[ Upstream commit b755c25fbcd568821a3bb0e0d5c2daa5fcb00bba ]
+
+When both supported and previous version have the same major version,
+and the firmwares are missing, the driver ends in a loop requesting the
+same (previous) version over and over again:
+
+    [   76.327413] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.1.img firmware, fall-back to previous 4.0 version
+    [   76.339802] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.352162] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.364502] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.376848] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.389183] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.401522] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.413860] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    [   76.426199] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.0.img firmware, fall-back to previous 4.0 version
+    ...
+
+Fix this by inverting the check to that we aren't yet at the previous
+version, and also check the minor version.
+
+This also catches the case where both versions are the same, as it was
+after commit bb5dbf2cc64d ("net: marvell: prestera: add firmware v4.0
+support").
+
+With this fix applied:
+
+    [   88.499622] Prestera DX 0000:01:00.0: missing latest mrvl/prestera/mvsw_prestera_fw-v4.1.img firmware, fall-back to previous 4.0 version
+    [   88.511995] Prestera DX 0000:01:00.0: failed to request previous firmware: mrvl/prestera/mvsw_prestera_fw-v4.0.img
+    [   88.522403] Prestera DX: probe of 0000:01:00.0 failed with error -2
+
+Fixes: 47f26018a414 ("net: marvell: prestera: try to load previous fw version")
+Signed-off-by: Jonas Gorski <jonas.gorski@bisdn.de>
+Acked-by: Elad Nachman <enachman@marvell.com>
+Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
+Acked-by: Taras Chornyi <taras.chornyi@plvision.eu>
+Link: https://lore.kernel.org/r/20230802092357.163944-1-jonas.gorski@bisdn.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/prestera/prestera_pci.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+index f328d957b2db7..35857dc19542f 100644
+--- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c
++++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c
+@@ -727,7 +727,8 @@ static int prestera_fw_get(struct prestera_fw *fw)
+ 
+       err = request_firmware_direct(&fw->bin, fw_path, fw->dev.dev);
+       if (err) {
+-              if (ver_maj == PRESTERA_SUPP_FW_MAJ_VER) {
++              if (ver_maj != PRESTERA_PREV_FW_MAJ_VER ||
++                  ver_min != PRESTERA_PREV_FW_MIN_VER) {
+                       ver_maj = PRESTERA_PREV_FW_MAJ_VER;
+                       ver_min = PRESTERA_PREV_FW_MIN_VER;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/qed-fix-scheduling-in-a-tasklet-while-getting-stats.patch b/queue-6.4/qed-fix-scheduling-in-a-tasklet-while-getting-stats.patch

new file mode 100644 (file)

index 0000000..8230ea7
--- /dev/null
+++ b/queue-6.4/qed-fix-scheduling-in-a-tasklet-while-getting-stats.patch
@@ -0,0 +1,452 @@
+From 365cec664ada0d3241ee19fdbf3586a2ad4fee75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 27 Jul 2023 18:26:09 +0300
+Subject: qed: Fix scheduling in a tasklet while getting stats
+
+From: Konstantin Khorenko <khorenko@virtuozzo.com>
+
+[ Upstream commit e346e231b42bcae6822a6326acfb7b741e9e6026 ]
+
+Here we've got to a situation when tasklet called usleep_range() in PTT
+acquire logic, thus welcome to the "scheduling while atomic" BUG().
+
+  BUG: scheduling while atomic: swapper/24/0/0x00000100
+
+   [<ffffffffb41c6199>] schedule+0x29/0x70
+   [<ffffffffb41c5512>] schedule_hrtimeout_range_clock+0xb2/0x150
+   [<ffffffffb41c55c3>] schedule_hrtimeout_range+0x13/0x20
+   [<ffffffffb41c3bcf>] usleep_range+0x4f/0x70
+   [<ffffffffc08d3e58>] qed_ptt_acquire+0x38/0x100 [qed]
+   [<ffffffffc08eac48>] _qed_get_vport_stats+0x458/0x580 [qed]
+   [<ffffffffc08ead8c>] qed_get_vport_stats+0x1c/0xd0 [qed]
+   [<ffffffffc08dffd3>] qed_get_protocol_stats+0x93/0x100 [qed]
+                        qed_mcp_send_protocol_stats
+            case MFW_DRV_MSG_GET_LAN_STATS:
+            case MFW_DRV_MSG_GET_FCOE_STATS:
+            case MFW_DRV_MSG_GET_ISCSI_STATS:
+            case MFW_DRV_MSG_GET_RDMA_STATS:
+   [<ffffffffc08e36d8>] qed_mcp_handle_events+0x2d8/0x890 [qed]
+                        qed_int_assertion
+                        qed_int_attentions
+   [<ffffffffc08d9490>] qed_int_sp_dpc+0xa50/0xdc0 [qed]
+   [<ffffffffb3aa7623>] tasklet_action+0x83/0x140
+   [<ffffffffb41d9125>] __do_softirq+0x125/0x2bb
+   [<ffffffffb41d560c>] call_softirq+0x1c/0x30
+   [<ffffffffb3a30645>] do_softirq+0x65/0xa0
+   [<ffffffffb3aa78d5>] irq_exit+0x105/0x110
+   [<ffffffffb41d8996>] do_IRQ+0x56/0xf0
+
+Fix this by making caller to provide the context whether it could be in
+atomic context flow or not when getting stats from QED driver.
+QED driver based on the context provided decide to schedule out or not
+when acquiring the PTT BAR window.
+
+We faced the BUG_ON() while getting vport stats, but according to the
+code same issue could happen for fcoe and iscsi statistics as well, so
+fixing them too.
+
+Fixes: 6c75424612a7 ("qed: Add support for NCSI statistics.")
+Fixes: 1e128c81290a ("qed: Add support for hardware offloaded FCoE.")
+Fixes: 2f2b2614e893 ("qed: Provide iSCSI statistics to management")
+Cc: Sudarsana Kalluru <skalluru@marvell.com>
+Cc: David Miller <davem@davemloft.net>
+Cc: Manish Chopra <manishc@marvell.com>
+
+Signed-off-by: Konstantin Khorenko <khorenko@virtuozzo.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 16 ++++++++++++
+ drivers/net/ethernet/qlogic/qed/qed_fcoe.c    | 19 ++++++++++----
+ drivers/net/ethernet/qlogic/qed/qed_fcoe.h    | 17 ++++++++++--
+ drivers/net/ethernet/qlogic/qed/qed_hw.c      | 26 ++++++++++++++++---
+ drivers/net/ethernet/qlogic/qed/qed_iscsi.c   | 19 ++++++++++----
+ drivers/net/ethernet/qlogic/qed/qed_iscsi.h   |  8 ++++--
+ drivers/net/ethernet/qlogic/qed/qed_l2.c      | 19 ++++++++++----
+ drivers/net/ethernet/qlogic/qed/qed_l2.h      | 24 +++++++++++++++++
+ drivers/net/ethernet/qlogic/qed/qed_main.c    |  6 ++---
+ 9 files changed, 128 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+index f8682356d0cf4..94d4f9413ab7a 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h
+@@ -193,6 +193,22 @@ void qed_hw_remove(struct qed_dev *cdev);
+  */
+ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn);
+ 
++/**
++ * qed_ptt_acquire_context(): Allocate a PTT window honoring the context
++ *                          atomicy.
++ *
++ * @p_hwfn: HW device data.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: struct qed_ptt.
++ *
++ * Should be called at the entry point to the driver
++ * (at the beginning of an exported function).
++ */
++struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn,
++                                      bool is_atomic);
++
+ /**
+  * qed_ptt_release(): Release PTT Window.
+  *
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+index 3764190b948eb..04602ac947087 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.c
+@@ -693,13 +693,14 @@ static void _qed_fcoe_get_pstats(struct qed_hwfn *p_hwfn,
+ }
+ 
+ static int qed_fcoe_get_stats(struct qed_hwfn *p_hwfn,
+-                            struct qed_fcoe_stats *p_stats)
++                            struct qed_fcoe_stats *p_stats,
++                            bool is_atomic)
+ {
+       struct qed_ptt *p_ptt;
+ 
+       memset(p_stats, 0, sizeof(*p_stats));
+ 
+-      p_ptt = qed_ptt_acquire(p_hwfn);
++      p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
+ 
+       if (!p_ptt) {
+               DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+@@ -973,19 +974,27 @@ static int qed_fcoe_destroy_conn(struct qed_dev *cdev,
+                                       QED_SPQ_MODE_EBLOCK, NULL);
+ }
+ 
++static int qed_fcoe_stats_context(struct qed_dev *cdev,
++                                struct qed_fcoe_stats *stats,
++                                bool is_atomic)
++{
++      return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
++}
++
+ static int qed_fcoe_stats(struct qed_dev *cdev, struct qed_fcoe_stats *stats)
+ {
+-      return qed_fcoe_get_stats(QED_AFFIN_HWFN(cdev), stats);
++      return qed_fcoe_stats_context(cdev, stats, false);
+ }
+ 
+ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+-                               struct qed_mcp_fcoe_stats *stats)
++                               struct qed_mcp_fcoe_stats *stats,
++                               bool is_atomic)
+ {
+       struct qed_fcoe_stats proto_stats;
+ 
+       /* Retrieve FW statistics */
+       memset(&proto_stats, 0, sizeof(proto_stats));
+-      if (qed_fcoe_stats(cdev, &proto_stats)) {
++      if (qed_fcoe_stats_context(cdev, &proto_stats, is_atomic)) {
+               DP_VERBOSE(cdev, QED_MSG_STORAGE,
+                          "Failed to collect FCoE statistics\n");
+               return;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+index 19c85adf4ceb1..214e8299ecb4e 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_fcoe.h
+@@ -28,8 +28,20 @@ int qed_fcoe_alloc(struct qed_hwfn *p_hwfn);
+ void qed_fcoe_setup(struct qed_hwfn *p_hwfn);
+ 
+ void qed_fcoe_free(struct qed_hwfn *p_hwfn);
++/**
++ * qed_get_protocol_stats_fcoe(): Fills provided statistics
++ *                              struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: Void.
++ */
+ void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+-                               struct qed_mcp_fcoe_stats *stats);
++                               struct qed_mcp_fcoe_stats *stats,
++                               bool is_atomic);
+ #else /* CONFIG_QED_FCOE */
+ static inline int qed_fcoe_alloc(struct qed_hwfn *p_hwfn)
+ {
+@@ -40,7 +52,8 @@ static inline void qed_fcoe_setup(struct qed_hwfn *p_hwfn) {}
+ static inline void qed_fcoe_free(struct qed_hwfn *p_hwfn) {}
+ 
+ static inline void qed_get_protocol_stats_fcoe(struct qed_dev *cdev,
+-                                             struct qed_mcp_fcoe_stats *stats)
++                                             struct qed_mcp_fcoe_stats *stats,
++                                             bool is_atomic)
+ {
+ }
+ #endif /* CONFIG_QED_FCOE */
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
+index 554f30b0cfd5e..6263f847b6b92 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
+@@ -23,7 +23,10 @@
+ #include "qed_reg_addr.h"
+ #include "qed_sriov.h"
+ 
+-#define QED_BAR_ACQUIRE_TIMEOUT 1000
++#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT    1000
++#define QED_BAR_ACQUIRE_TIMEOUT_USLEEP                1000
++#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT    100000
++#define QED_BAR_ACQUIRE_TIMEOUT_UDELAY                10
+ 
+ /* Invalid values */
+ #define QED_BAR_INVALID_OFFSET          (cpu_to_le32(-1))
+@@ -84,12 +87,22 @@ void qed_ptt_pool_free(struct qed_hwfn *p_hwfn)
+ }
+ 
+ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
++{
++      return qed_ptt_acquire_context(p_hwfn, false);
++}
++
++struct qed_ptt *qed_ptt_acquire_context(struct qed_hwfn *p_hwfn, bool is_atomic)
+ {
+       struct qed_ptt *p_ptt;
+-      unsigned int i;
++      unsigned int i, count;
++
++      if (is_atomic)
++              count = QED_BAR_ACQUIRE_TIMEOUT_UDELAY_CNT;
++      else
++              count = QED_BAR_ACQUIRE_TIMEOUT_USLEEP_CNT;
+ 
+       /* Take the free PTT from the list */
+-      for (i = 0; i < QED_BAR_ACQUIRE_TIMEOUT; i++) {
++      for (i = 0; i < count; i++) {
+               spin_lock_bh(&p_hwfn->p_ptt_pool->lock);
+ 
+               if (!list_empty(&p_hwfn->p_ptt_pool->free_list)) {
+@@ -105,7 +118,12 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn)
+               }
+ 
+               spin_unlock_bh(&p_hwfn->p_ptt_pool->lock);
+-              usleep_range(1000, 2000);
++
++              if (is_atomic)
++                      udelay(QED_BAR_ACQUIRE_TIMEOUT_UDELAY);
++              else
++                      usleep_range(QED_BAR_ACQUIRE_TIMEOUT_USLEEP,
++                                   QED_BAR_ACQUIRE_TIMEOUT_USLEEP * 2);
+       }
+ 
+       DP_NOTICE(p_hwfn, "PTT acquire timeout - failed to allocate PTT\n");
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+index 511ab214eb9c8..980e7289b4814 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
+@@ -999,13 +999,14 @@ static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
+ }
+ 
+ static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
+-                             struct qed_iscsi_stats *stats)
++                             struct qed_iscsi_stats *stats,
++                             bool is_atomic)
+ {
+       struct qed_ptt *p_ptt;
+ 
+       memset(stats, 0, sizeof(*stats));
+ 
+-      p_ptt = qed_ptt_acquire(p_hwfn);
++      p_ptt = qed_ptt_acquire_context(p_hwfn, is_atomic);
+       if (!p_ptt) {
+               DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+               return -EAGAIN;
+@@ -1336,9 +1337,16 @@ static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
+                                          QED_SPQ_MODE_EBLOCK, NULL);
+ }
+ 
++static int qed_iscsi_stats_context(struct qed_dev *cdev,
++                                 struct qed_iscsi_stats *stats,
++                                 bool is_atomic)
++{
++      return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats, is_atomic);
++}
++
+ static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
+ {
+-      return qed_iscsi_get_stats(QED_AFFIN_HWFN(cdev), stats);
++      return qed_iscsi_stats_context(cdev, stats, false);
+ }
+ 
+ static int qed_iscsi_change_mac(struct qed_dev *cdev,
+@@ -1358,13 +1366,14 @@ static int qed_iscsi_change_mac(struct qed_dev *cdev,
+ }
+ 
+ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+-                                struct qed_mcp_iscsi_stats *stats)
++                                struct qed_mcp_iscsi_stats *stats,
++                                bool is_atomic)
+ {
+       struct qed_iscsi_stats proto_stats;
+ 
+       /* Retrieve FW statistics */
+       memset(&proto_stats, 0, sizeof(proto_stats));
+-      if (qed_iscsi_stats(cdev, &proto_stats)) {
++      if (qed_iscsi_stats_context(cdev, &proto_stats, is_atomic)) {
+               DP_VERBOSE(cdev, QED_MSG_STORAGE,
+                          "Failed to collect ISCSI statistics\n");
+               return;
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+index dec2b00259d42..974cb8d26608c 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
+@@ -39,11 +39,14 @@ void qed_iscsi_free(struct qed_hwfn *p_hwfn);
+  *
+  * @cdev: Qed dev pointer.
+  * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
+  *
++ * Context: The function should not sleep in case is_atomic == true.
+  * Return: Void.
+  */
+ void qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+-                                struct qed_mcp_iscsi_stats *stats);
++                                struct qed_mcp_iscsi_stats *stats,
++                                bool is_atomic);
+ #else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+ static inline int qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
+ {
+@@ -56,7 +59,8 @@ static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn) {}
+ 
+ static inline void
+ qed_get_protocol_stats_iscsi(struct qed_dev *cdev,
+-                           struct qed_mcp_iscsi_stats *stats) {}
++                           struct qed_mcp_iscsi_stats *stats,
++                           bool is_atomic) {}
+ #endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+ 
+ #endif
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
+index 7776d3bdd459a..970b9aabbc3d7 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
+@@ -1863,7 +1863,8 @@ static void __qed_get_vport_stats(struct qed_hwfn *p_hwfn,
+ }
+ 
+ static void _qed_get_vport_stats(struct qed_dev *cdev,
+-                               struct qed_eth_stats *stats)
++                               struct qed_eth_stats *stats,
++                               bool is_atomic)
+ {
+       u8 fw_vport = 0;
+       int i;
+@@ -1872,10 +1873,11 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
+ 
+       for_each_hwfn(cdev, i) {
+               struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
+-              struct qed_ptt *p_ptt = IS_PF(cdev) ? qed_ptt_acquire(p_hwfn)
+-                                                  :  NULL;
++              struct qed_ptt *p_ptt;
+               bool b_get_port_stats;
+ 
++              p_ptt = IS_PF(cdev) ? qed_ptt_acquire_context(p_hwfn, is_atomic)
++                                  : NULL;
+               if (IS_PF(cdev)) {
+                       /* The main vport index is relative first */
+                       if (qed_fw_vport(p_hwfn, 0, &fw_vport)) {
+@@ -1900,6 +1902,13 @@ static void _qed_get_vport_stats(struct qed_dev *cdev,
+ }
+ 
+ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
++{
++      qed_get_vport_stats_context(cdev, stats, false);
++}
++
++void qed_get_vport_stats_context(struct qed_dev *cdev,
++                               struct qed_eth_stats *stats,
++                               bool is_atomic)
+ {
+       u32 i;
+ 
+@@ -1908,7 +1917,7 @@ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats)
+               return;
+       }
+ 
+-      _qed_get_vport_stats(cdev, stats);
++      _qed_get_vport_stats(cdev, stats, is_atomic);
+ 
+       if (!cdev->reset_stats)
+               return;
+@@ -1960,7 +1969,7 @@ void qed_reset_vport_stats(struct qed_dev *cdev)
+       if (!cdev->reset_stats) {
+               DP_INFO(cdev, "Reset stats not allocated\n");
+       } else {
+-              _qed_get_vport_stats(cdev, cdev->reset_stats);
++              _qed_get_vport_stats(cdev, cdev->reset_stats, false);
+               cdev->reset_stats->common.link_change_count = 0;
+       }
+ }
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
+index a538cf478c14e..2d2f82c785ad2 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
++++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
+@@ -249,8 +249,32 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
+                           enum spq_mode comp_mode,
+                           struct qed_spq_comp_cb *p_comp_data);
+ 
++/**
++ * qed_get_vport_stats(): Fills provided statistics
++ *                      struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ *
++ * Return: Void.
++ */
+ void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
+ 
++/**
++ * qed_get_vport_stats_context(): Fills provided statistics
++ *                              struct with statistics.
++ *
++ * @cdev: Qed dev pointer.
++ * @stats: Points to struct that will be filled with statistics.
++ * @is_atomic: Hint from the caller - if the func can sleep or not.
++ *
++ * Context: The function should not sleep in case is_atomic == true.
++ * Return: Void.
++ */
++void qed_get_vport_stats_context(struct qed_dev *cdev,
++                               struct qed_eth_stats *stats,
++                               bool is_atomic);
++
+ void qed_reset_vport_stats(struct qed_dev *cdev);
+ 
+ /**
+diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
+index f5af83342856f..c278f8893042b 100644
+--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
++++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
+@@ -3092,7 +3092,7 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
+ 
+       switch (type) {
+       case QED_MCP_LAN_STATS:
+-              qed_get_vport_stats(cdev, &eth_stats);
++              qed_get_vport_stats_context(cdev, &eth_stats, true);
+               stats->lan_stats.ucast_rx_pkts =
+                                       eth_stats.common.rx_ucast_pkts;
+               stats->lan_stats.ucast_tx_pkts =
+@@ -3100,10 +3100,10 @@ void qed_get_protocol_stats(struct qed_dev *cdev,
+               stats->lan_stats.fcs_err = -1;
+               break;
+       case QED_MCP_FCOE_STATS:
+-              qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats);
++              qed_get_protocol_stats_fcoe(cdev, &stats->fcoe_stats, true);
+               break;
+       case QED_MCP_ISCSI_STATS:
+-              qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats);
++              qed_get_protocol_stats_iscsi(cdev, &stats->iscsi_stats, true);
+               break;
+       default:
+               DP_VERBOSE(cdev, QED_MSG_SP,
+-- 
+2.40.1
+
diff --git a/queue-6.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch b/queue-6.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch

new file mode 100644 (file)

index 0000000..fd78f70
--- /dev/null
+++ b/queue-6.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch
@@ -0,0 +1,66 @@
+From 3844afbbd723e5830c748d56ad30bff3e01b6774 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 15:53:14 +0800
+Subject: rtnetlink: let rtnl_bridge_setlink checks IFLA_BRIDGE_MODE length
+
+From: Lin Ma <linma@zju.edu.cn>
+
+[ Upstream commit d73ef2d69c0dba5f5a1cb9600045c873bab1fb7f ]
+
+There are totally 9 ndo_bridge_setlink handlers in the current kernel,
+which are 1) bnxt_bridge_setlink, 2) be_ndo_bridge_setlink 3)
+i40e_ndo_bridge_setlink 4) ice_bridge_setlink 5)
+ixgbe_ndo_bridge_setlink 6) mlx5e_bridge_setlink 7)
+nfp_net_bridge_setlink 8) qeth_l2_bridge_setlink 9) br_setlink.
+
+By investigating the code, we find that 1-7 parse and use nlattr
+IFLA_BRIDGE_MODE but 3 and 4 forget to do the nla_len check. This can
+lead to an out-of-attribute read and allow a malformed nlattr (e.g.,
+length 0) to be viewed as a 2 byte integer.
+
+To avoid such issues, also for other ndo_bridge_setlink handlers in the
+future. This patch adds the nla_len check in rtnl_bridge_setlink and
+does an early error return if length mismatches. To make it works, the
+break is removed from the parsing for IFLA_BRIDGE_FLAGS to make sure
+this nla_for_each_nested iterates every attribute.
+
+Fixes: b1edc14a3fbf ("ice: Implement ice_bridge_getlink and ice_bridge_setlink")
+Fixes: 51616018dd1b ("i40e: Add support for getlink, setlink ndo ops")
+Suggested-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Lin Ma <linma@zju.edu.cn>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Hangbin Liu <liuhangbin@gmail.com>
+Link: https://lore.kernel.org/r/20230726075314.1059224-1-linma@zju.edu.cn
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/rtnetlink.c | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+index 2fe6a3379aaed..aa1743b2b770b 100644
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -5139,13 +5139,17 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+       br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
+       if (br_spec) {
+               nla_for_each_nested(attr, br_spec, rem) {
+-                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS) {
++                      if (nla_type(attr) == IFLA_BRIDGE_FLAGS && !have_flags) {
+                               if (nla_len(attr) < sizeof(flags))
+                                       return -EINVAL;
+ 
+                               have_flags = true;
+                               flags = nla_get_u16(attr);
+-                              break;
++                      }
++
++                      if (nla_type(attr) == IFLA_BRIDGE_MODE) {
++                              if (nla_len(attr) < sizeof(u16))
++                                      return -EINVAL;
+                       }
+               }
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.4/s390-qeth-don-t-call-dev_close-dev_open-down-up.patch b/queue-6.4/s390-qeth-don-t-call-dev_close-dev_open-down-up.patch

new file mode 100644 (file)

index 0000000..900fd45
--- /dev/null
+++ b/queue-6.4/s390-qeth-don-t-call-dev_close-dev_open-down-up.patch
@@ -0,0 +1,104 @@
+From 31e1802f4fb660618ac90154f444ec73e044a050 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 10:00:16 +0200
+Subject: s390/qeth: Don't call dev_close/dev_open (DOWN/UP)
+
+From: Alexandra Winter <wintera@linux.ibm.com>
+
+[ Upstream commit 1cfef80d4c2b2c599189f36f36320b205d9447d9 ]
+
+dev_close() and dev_open() are issued to change the interface state to DOWN
+or UP (dev->flags IFF_UP). When the netdev is set DOWN it loses e.g its
+Ipv6 addresses and routes. We don't want this in cases of device recovery
+(triggered by hardware or software) or when the qeth device is set
+offline.
+
+Setting a qeth device offline or online and device recovery actions call
+netif_device_detach() and/or netif_device_attach(). That will reset or
+set the LOWER_UP indication i.e. change the dev->state Bit
+__LINK_STATE_PRESENT. That is enough to e.g. cause bond failovers, and
+still preserves the interface settings that are handled by the network
+stack.
+
+Don't call dev_open() nor dev_close() from the qeth device driver. Let the
+network stack handle this.
+
+Fixes: d4560150cb47 ("s390/qeth: call dev_close() during recovery")
+Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
+Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/qeth_core.h      | 1 -
+ drivers/s390/net/qeth_core_main.c | 2 --
+ drivers/s390/net/qeth_l2_main.c   | 9 ++++++---
+ drivers/s390/net/qeth_l3_main.c   | 8 +++++---
+ 4 files changed, 11 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
+index 1d195429753dd..613eab7297046 100644
+--- a/drivers/s390/net/qeth_core.h
++++ b/drivers/s390/net/qeth_core.h
+@@ -716,7 +716,6 @@ struct qeth_card_info {
+       u16 chid;
+       u8 ids_valid:1; /* cssid,iid,chid */
+       u8 dev_addr_is_registered:1;
+-      u8 open_when_online:1;
+       u8 promisc_mode:1;
+       u8 use_v1_blkt:1;
+       u8 is_vm_nic:1;
+diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
+index 1d5b207c2b9e9..cd783290bde5e 100644
+--- a/drivers/s390/net/qeth_core_main.c
++++ b/drivers/s390/net/qeth_core_main.c
+@@ -5373,8 +5373,6 @@ int qeth_set_offline(struct qeth_card *card, const struct qeth_discipline *disc,
+       qeth_clear_ipacmd_list(card);
+ 
+       rtnl_lock();
+-      card->info.open_when_online = card->dev->flags & IFF_UP;
+-      dev_close(card->dev);
+       netif_device_detach(card->dev);
+       netif_carrier_off(card->dev);
+       rtnl_unlock();
+diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
+index 9f13ed170a437..75910c0bcc2bc 100644
+--- a/drivers/s390/net/qeth_l2_main.c
++++ b/drivers/s390/net/qeth_l2_main.c
+@@ -2388,9 +2388,12 @@ static int qeth_l2_set_online(struct qeth_card *card, bool carrier_ok)
+               qeth_enable_hw_features(dev);
+               qeth_l2_enable_brport_features(card);
+ 
+-              if (card->info.open_when_online) {
+-                      card->info.open_when_online = 0;
+-                      dev_open(dev, NULL);
++              if (netif_running(dev)) {
++                      local_bh_disable();
++                      napi_schedule(&card->napi);
++                      /* kick-start the NAPI softirq: */
++                      local_bh_enable();
++                      qeth_l2_set_rx_mode(dev);
+               }
+               rtnl_unlock();
+       }
+diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
+index af4e60d2917e9..b92a32b4b1141 100644
+--- a/drivers/s390/net/qeth_l3_main.c
++++ b/drivers/s390/net/qeth_l3_main.c
+@@ -2018,9 +2018,11 @@ static int qeth_l3_set_online(struct qeth_card *card, bool carrier_ok)
+               netif_device_attach(dev);
+               qeth_enable_hw_features(dev);
+ 
+-              if (card->info.open_when_online) {
+-                      card->info.open_when_online = 0;
+-                      dev_open(dev, NULL);
++              if (netif_running(dev)) {
++                      local_bh_disable();
++                      napi_schedule(&card->napi);
++                      /* kick-start the NAPI softirq: */
++                      local_bh_enable();
+               }
+               rtnl_unlock();
+       }
+-- 
+2.40.1
+
diff --git a/queue-6.4/s390-vmem-split-pages-when-debug-pagealloc-is-enable.patch b/queue-6.4/s390-vmem-split-pages-when-debug-pagealloc-is-enable.patch

new file mode 100644 (file)

index 0000000..fea5780
--- /dev/null
+++ b/queue-6.4/s390-vmem-split-pages-when-debug-pagealloc-is-enable.patch
@@ -0,0 +1,67 @@
+From 80f6a4e001711ee634ac53beabb5f3d10de1c561 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 26 Jul 2023 11:10:19 +0200
+Subject: s390/vmem: split pages when debug pagealloc is enabled
+
+From: Sven Schnelle <svens@linux.ibm.com>
+
+[ Upstream commit edc1e4b6e26536868ef819a735e04a5b32c10589 ]
+
+Since commit bb1520d581a3 ("s390/mm: start kernel with DAT enabled")
+the kernel crashes early during boot when debug pagealloc is enabled:
+
+mem auto-init: stack:off, heap alloc:off, heap free:off
+addressing exception: 0005 ilc:2 [#1] SMP DEBUG_PAGEALLOC
+Modules linked in:
+CPU: 0 PID: 0 Comm: swapper Not tainted 6.5.0-rc3-09759-gc5666c912155 #630
+[..]
+Krnl Code: 00000000001325f6: ec5600248064 cgrj %r5,%r6,8,000000000013263e
+           00000000001325fc: eb880002000c srlg %r8,%r8,2
+          #0000000000132602: b2210051     ipte %r5,%r1,%r0,0
+          >0000000000132606: b90400d1     lgr %r13,%r1
+           000000000013260a: 41605008     la %r6,8(%r5)
+           000000000013260e: a7db1000     aghi %r13,4096
+           0000000000132612: b221006d     ipte %r6,%r13,%r0,0
+           0000000000132616: e3d0d0000171 lay %r13,4096(%r13)
+
+Call Trace:
+ __kernel_map_pages+0x14e/0x320
+ __free_pages_ok+0x23a/0x5a8)
+ free_low_memory_core_early+0x214/0x2c8
+ memblock_free_all+0x28/0x58
+ mem_init+0xb6/0x228
+ mm_core_init+0xb6/0x3b0
+ start_kernel+0x1d2/0x5a8
+ startup_continue+0x36/0x40
+Kernel panic - not syncing: Fatal exception: panic_on_oops
+
+This is caused by using large mappings on machines with EDAT1/EDAT2. Add
+the code to split the mappings into 4k pages if debug pagealloc is enabled
+by CONFIG_DEBUG_PAGEALLOC_ENABLE_DEFAULT or the debug_pagealloc kernel
+command line option.
+
+Fixes: bb1520d581a3 ("s390/mm: start kernel with DAT enabled")
+Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
+Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/mm/vmem.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
+index b9dcb4ae6c59a..05f4912380fac 100644
+--- a/arch/s390/mm/vmem.c
++++ b/arch/s390/mm/vmem.c
+@@ -761,6 +761,8 @@ void __init vmem_map_init(void)
+       if (static_key_enabled(&cpu_has_bear))
+               set_memory_nx(0, 1);
+       set_memory_nx(PAGE_SIZE, 1);
++      if (debug_pagealloc_enabled())
++              set_memory_4k(0, ident_map_size >> PAGE_SHIFT);
+ 
+       pr_info("Write protected kernel read-only data: %luk\n",
+               (unsigned long)(__end_rodata - _stext) >> 10);
+-- 
+2.40.1
+
diff --git a/queue-6.4/selftest-net-assert-on-a-proper-value-in-so_incoming.patch b/queue-6.4/selftest-net-assert-on-a-proper-value-in-so_incoming.patch

new file mode 100644 (file)

index 0000000..f9c7bf3
--- /dev/null
+++ b/queue-6.4/selftest-net-assert-on-a-proper-value-in-so_incoming.patch
@@ -0,0 +1,47 @@
+From c85aa69ea9cd8b11739fd8de27221dbf8a337568 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 11:15:53 -0700
+Subject: selftest: net: Assert on a proper value in so_incoming_cpu.c.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 3ff1617450eceb290ac17120fc172815e09a93cf ]
+
+Dan Carpenter reported an error spotted by Smatch.
+
+  ./tools/testing/selftests/net/so_incoming_cpu.c:163 create_clients()
+  error: uninitialized symbol 'ret'.
+
+The returned value of sched_setaffinity() should be checked with
+ASSERT_EQ(), but the value was not saved in a proper variable,
+resulting in an error above.
+
+Let's save the returned value of with sched_setaffinity().
+
+Fixes: 6df96146b202 ("selftest: Add test for SO_INCOMING_CPU.")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Closes: https://lore.kernel.org/linux-kselftest/fe376760-33b6-4fc9-88e8-178e809af1ac@moroto.mountain/
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230731181553.5392-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/selftests/net/so_incoming_cpu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
+index 0e04f9fef9867..a148181641026 100644
+--- a/tools/testing/selftests/net/so_incoming_cpu.c
++++ b/tools/testing/selftests/net/so_incoming_cpu.c
+@@ -159,7 +159,7 @@ void create_clients(struct __test_metadata *_metadata,
+               /* Make sure SYN will be processed on the i-th CPU
+                * and finally distributed to the i-th listener.
+                */
+-              sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
++              ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+               ASSERT_EQ(ret, 0);
+ 
+               for (j = 0; j < CLIENT_PER_SERVER; j++) {
+-- 
+2.40.1
+
diff --git a/queue-6.4/series b/queue-6.4/series

index a290833cdd6c4abfe639b26ad8a0c33d1c0cda77..6bd7b6a3587e1ce1fe96a43a4d6961824dcde5eb 100644 (file)
--- a/queue-6.4/series
+++ b/queue-6.4/series
@@ -3,3 +3,93 @@ iommu-arm-smmu-v3-work-around-mmu-600-erratum-1076982.patch
  iommu-arm-smmu-v3-document-mmu-700-erratum-2812531.patch
  iommu-arm-smmu-v3-add-explicit-feature-for-nesting.patch
  iommu-arm-smmu-v3-document-nesting-related-errata.patch
+arm64-dts-imx8mm-venice-gw7903-disable-disp_blk_ctrl.patch
+arm64-dts-imx8mm-venice-gw7904-disable-disp_blk_ctrl.patch
+arm64-dts-phycore-imx8mm-label-typo-fix-of-vpu.patch
+arm64-dts-phycore-imx8mm-correction-in-gpio-line-nam.patch
+arm64-dts-imx8mn-var-som-add-missing-pull-up-for-onb.patch
+arm64-dts-freescale-fix-vpu-g2-clock.patch
+firmware-smccc-fix-use-of-uninitialised-results-stru.patch
+firmware-arm_scmi-fix-signed-error-return-values-han.patch
+lib-bitmap-workaround-const_eval-test-build-failure.patch
+arm-dts-nxp-imx-limit-sk-imx53-supported-frequencies.patch
+soc-imx-imx8mp-blk-ctrl-register-hsio-pll-clock-as-b.patch
+firmware-arm_scmi-fix-chan_free-cleanup-on-smc.patch
+arm-dts-at91-use-clock-controller-name-for-pmc-nodes.patch
+arm-dts-at91-use-clock-controller-name-for-sckc-node.patch
+arm-dts-at91-use-generic-name-for-shutdown-controlle.patch
+arm-dts-at91-sam9x60-fix-the-soc-detection.patch
+word-at-a-time-use-the-same-return-type-for-has_zero.patch
+s390-vmem-split-pages-when-debug-pagealloc-is-enable.patch
+kvm-s390-fix-sthyi-error-handling.patch
+erofs-fix-wrong-primary-bvec-selection-on-deduplicat.patch
+perf-pmu-arm64-fix-reading-the-pmu-cpu-slots-in-sysf.patch
+wifi-cfg80211-fix-return-value-in-scan-logic.patch
+net-mlx5e-fix-double-free-in-macsec_fs_tx_create_cry.patch
+net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch
+net-mlx5-fix-potential-memory-leak-in-mlx5e_init_rep.patch
+net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch
+net-mlx5-honor-user-input-for-migratable-port-fn-att.patch
+net-mlx5e-don-t-hold-encap-tbl-lock-if-there-is-no-e.patch
+net-mlx5e-fix-crash-moving-to-switchdev-mode-when-nt.patch
+net-mlx5e-move-representor-neigh-cleanup-to-profile-.patch
+net-mlx5e-xsk-fix-invalid-buffer-access-for-legacy-r.patch
+net-mlx5e-xsk-fix-crash-on-regular-rq-reactivation.patch
+net-mlx5e-ktls-fix-protection-domain-in-use-syndrome.patch
+net-mlx5-fs_chains-fix-ft-prio-if-ignore_flow_level-.patch
+net-mlx5-unregister-devlink-params-in-case-interface.patch
+bpf-add-length-check-for-sk_diag_bpf_storage_req_map.patch
+rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch
+net-dsa-fix-value-check-in-bcm_sf2_sw_probe.patch
+perf-test-uprobe_from_different_cu-skip-if-there-is-.patch
+net-sched-cls_u32-fix-match-key-mis-addressing.patch
+misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch
+net-stmmac-tegra-properly-allocate-clock-bulk-data.patch
+qed-fix-scheduling-in-a-tasklet-while-getting-stats.patch
+net-move-gso-declarations-and-functions-to-their-own.patch
+net-gro-fix-misuse-of-cb-in-udp-socket-lookup.patch
+net-annotate-data-races-around-sk-sk_reserved_mem.patch
+net-annotate-data-race-around-sk-sk_txrehash.patch
+net-annotate-data-races-around-sk-sk_max_pacing_rate.patch
+net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch
+net-add-missing-read_once-sk-sk_sndbuf-annotation.patch
+net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch
+net-annotate-data-races-around-sk-sk_mark.patch
+net-add-missing-data-race-annotations-around-sk-sk_p.patch
+net-add-missing-data-race-annotation-for-sk_ll_usec.patch
+net-annotate-data-races-around-sk-sk_priority.patch
+net-sched-taprio-limit-tca_taprio_attr_sched_cycle_t.patch
+net-usb-lan78xx-reorder-cleanup-operations-to-avoid-.patch
+ice-fix-rdma-vsi-removal-during-queue-rebuild.patch
+bnxt-don-t-handle-xdp-in-netpoll.patch
+octeon_ep-initialize-mbox-mutexes.patch
+bpf-move-unprivileged-checks-into-map_create-and-bpf.patch
+bpf-inline-map-creation-logic-in-map_create-function.patch
+bpf-centralize-permissions-checks-for-all-bpf-map-ty.patch
+bpf-cpumap-make-sure-kthread-is-running-before-map-u.patch
+bpf-cpumap-handle-skb-as-well-when-clean-up-ptr_ring.patch
+net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch
+net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch
+net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch
+bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch
+net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch
+net-korina-handle-clk-prepare-error-in-korina_probe.patch
+net-netsec-ignore-phy-mode-on-synquacer-in-dt-mode.patch
+selftest-net-assert-on-a-proper-value-in-so_incoming.patch
+bnxt_en-fix-page-pool-logic-for-page-size-64k.patch
+bnxt_en-fix-max_mtu-setting-for-multi-buf-xdp.patch
+net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch
+s390-qeth-don-t-call-dev_close-dev_open-down-up.patch
+ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch
+vxlan-fix-nexthop-hash-size.patch
+net-mlx5-fs_core-make-find_closest_ft-more-generic.patch
+net-mlx5-fs_core-skip-the-fts-in-the-same-fs_type_pr.patch
+net-mlx5e-set-proper-ipsec-source-port-in-l4-selecto.patch
+prestera-fix-fallback-to-previous-version-on-same-ma.patch
+tcp_metrics-fix-addr_same-helper.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
+tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
+tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
+test-vsock-remove-vsock_perf-executable-on-make-clea.patch
diff --git a/queue-6.4/soc-imx-imx8mp-blk-ctrl-register-hsio-pll-clock-as-b.patch b/queue-6.4/soc-imx-imx8mp-blk-ctrl-register-hsio-pll-clock-as-b.patch

new file mode 100644 (file)

index 0000000..989b08f
--- /dev/null
+++ b/queue-6.4/soc-imx-imx8mp-blk-ctrl-register-hsio-pll-clock-as-b.patch
@@ -0,0 +1,46 @@
+From 99208626377ae451b76deafef4e913e0d2703310 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 16:54:09 +0200
+Subject: soc: imx: imx8mp-blk-ctrl: register HSIO PLL clock as bus_power_dev
+ child
+
+From: Lucas Stach <l.stach@pengutronix.de>
+
+[ Upstream commit 53cab4d871690c49fac87c657cbf459e39c5b93b ]
+
+The blk-ctrl device is deliberately placed outside of the GPC power
+domain as it needs to control the power sequencing of the blk-ctrl
+domains together with the GPC domains.
+
+Clock runtime PM works by operating on the clock parent device, which
+doesn't translate into the neccessary GPC power domain action if the
+clk parent is not part of the GPC power domain. Use the bus_power_device
+as the parent for the clock to trigger the proper GPC domain actions on
+clock runtime power management.
+
+Fixes: 2cbee26e5d59 ("soc: imx: imx8mp-blk-ctrl: expose high performance PLL clock")
+Reported-by: Yannic Moog <Y.Moog@phytec.de>
+Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
+Tested-by: Yannic Moog <y.moog@phytec.de>
+Signed-off-by: Shawn Guo <shawnguo@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soc/imx/imx8mp-blk-ctrl.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/soc/imx/imx8mp-blk-ctrl.c b/drivers/soc/imx/imx8mp-blk-ctrl.c
+index 870aecc0202ae..1c1fcab4979a4 100644
+--- a/drivers/soc/imx/imx8mp-blk-ctrl.c
++++ b/drivers/soc/imx/imx8mp-blk-ctrl.c
+@@ -164,7 +164,7 @@ static int imx8mp_hsio_blk_ctrl_probe(struct imx8mp_blk_ctrl *bc)
+       clk_hsio_pll->hw.init = &init;
+ 
+       hw = &clk_hsio_pll->hw;
+-      ret = devm_clk_hw_register(bc->dev, hw);
++      ret = devm_clk_hw_register(bc->bus_power_dev, hw);
+       if (ret)
+               return ret;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch

new file mode 100644 (file)

index 0000000..e4eba1b
--- /dev/null
+++ b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch
@@ -0,0 +1,51 @@
+From f3d962682e6a96c28a90a56fc7c63b87020a429d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:57 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_lock
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 285ce119a3c6c4502585936650143e54c8692788 ]
+
+tm->tcpm_lock can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 8386165887963..131fa30049691 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -59,7 +59,8 @@ static inline struct net *tm_net(struct tcp_metrics_block *tm)
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+                             enum tcp_metric_index idx)
+ {
+-      return tm->tcpm_lock & (1 << idx);
++      /* Paired with WRITE_ONCE() in tcpm_suck_dst() */
++      return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+ 
+ static u32 tcp_metric_get(struct tcp_metrics_block *tm,
+@@ -110,7 +111,8 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+               val |= 1 << TCP_METRIC_CWND;
+       if (dst_metric_locked(dst, RTAX_REORDERING))
+               val |= 1 << TCP_METRIC_REORDERING;
+-      tm->tcpm_lock = val;
++      /* Paired with READ_ONCE() in tcp_metric_locked() */
++      WRITE_ONCE(tm->tcpm_lock, val);
+ 
+       msval = dst_metric_raw(dst, RTAX_RTT);
+       tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch

new file mode 100644 (file)

index 0000000..61210de
--- /dev/null
+++ b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch
@@ -0,0 +1,66 @@
+From 11bc7fb4bf9c5fdf484b4e9ebdf11045fbb2a26a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:59 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_net
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit d5d986ce42c71a7562d32c4e21e026b0f87befec ]
+
+tm->tcpm_net can be read or written locklessly.
+
+Instead of changing write_pnet() and read_pnet() and potentially
+hurt performance, add the needed READ_ONCE()/WRITE_ONCE()
+in tm_net() and tcpm_new().
+
+Fixes: 849e8a0ca8d5 ("tcp_metrics: Add a field tcpm_net and verify it matches on lookup")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index fd4ab7a51cef2..4fd274836a48f 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -40,7 +40,7 @@ struct tcp_fastopen_metrics {
+ 
+ struct tcp_metrics_block {
+       struct tcp_metrics_block __rcu  *tcpm_next;
+-      possible_net_t                  tcpm_net;
++      struct net                      *tcpm_net;
+       struct inetpeer_addr            tcpm_saddr;
+       struct inetpeer_addr            tcpm_daddr;
+       unsigned long                   tcpm_stamp;
+@@ -51,9 +51,10 @@ struct tcp_metrics_block {
+       struct rcu_head                 rcu_head;
+ };
+ 
+-static inline struct net *tm_net(struct tcp_metrics_block *tm)
++static inline struct net *tm_net(const struct tcp_metrics_block *tm)
+ {
+-      return read_pnet(&tm->tcpm_net);
++      /* Paired with the WRITE_ONCE() in tcpm_new() */
++      return READ_ONCE(tm->tcpm_net);
+ }
+ 
+ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+@@ -197,7 +198,9 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               if (!tm)
+                       goto out_unlock;
+       }
+-      write_pnet(&tm->tcpm_net, net);
++      /* Paired with the READ_ONCE() in tm_net() */
++      WRITE_ONCE(tm->tcpm_net, net);
++
+       tm->tcpm_saddr = *saddr;
+       tm->tcpm_daddr = *daddr;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch

new file mode 100644 (file)

index 0000000..bd447fc
--- /dev/null
+++ b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch
@@ -0,0 +1,88 @@
+From bb519188039e934fe68c543b856a4e0be105cb90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:56 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_stamp
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 949ad62a5d5311d36fce2e14fe5fed3f936da51c ]
+
+tm->tcpm_stamp can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this.
+
+Also constify tcpm_check_stamp() dst argument.
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 19 +++++++++++++------
+ 1 file changed, 13 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index c4daf0aa2d4d9..8386165887963 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -97,7 +97,7 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       u32 msval;
+       u32 val;
+ 
+-      tm->tcpm_stamp = jiffies;
++      WRITE_ONCE(tm->tcpm_stamp, jiffies);
+ 
+       val = 0;
+       if (dst_metric_locked(dst, RTAX_RTT))
+@@ -131,9 +131,15 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+ 
+ #define TCP_METRICS_TIMEOUT           (60 * 60 * HZ)
+ 
+-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
++static void tcpm_check_stamp(struct tcp_metrics_block *tm,
++                           const struct dst_entry *dst)
+ {
+-      if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
++      unsigned long limit;
++
++      if (!tm)
++              return;
++      limit = READ_ONCE(tm->tcpm_stamp) + TCP_METRICS_TIMEOUT;
++      if (unlikely(time_after(jiffies, limit)))
+               tcpm_suck_dst(tm, dst, false);
+ }
+ 
+@@ -174,7 +180,8 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               oldest = deref_locked(tcp_metrics_hash[hash].chain);
+               for (tm = deref_locked(oldest->tcpm_next); tm;
+                    tm = deref_locked(tm->tcpm_next)) {
+-                      if (time_before(tm->tcpm_stamp, oldest->tcpm_stamp))
++                      if (time_before(READ_ONCE(tm->tcpm_stamp),
++                                      READ_ONCE(oldest->tcpm_stamp)))
+                               oldest = tm;
+               }
+               tm = oldest;
+@@ -434,7 +441,7 @@ void tcp_update_metrics(struct sock *sk)
+                                              tp->reordering);
+               }
+       }
+-      tm->tcpm_stamp = jiffies;
++      WRITE_ONCE(tm->tcpm_stamp, jiffies);
+ out_unlock:
+       rcu_read_unlock();
+ }
+@@ -647,7 +654,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+       }
+ 
+       if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE,
+-                        jiffies - tm->tcpm_stamp,
++                        jiffies - READ_ONCE(tm->tcpm_stamp),
+                         TCP_METRICS_ATTR_PAD) < 0)
+               goto nla_put_failure;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch

new file mode 100644 (file)

index 0000000..0c352b0
--- /dev/null
+++ b/queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch
@@ -0,0 +1,85 @@
+From 7c742fd9b5b9516df980dadf8b76d996df468aa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:58 +0000
+Subject: tcp_metrics: annotate data-races around tm->tcpm_vals[]
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 8c4d04f6b443869d25e59822f7cec88d647028a9 ]
+
+tm->tcpm_vals[] values can be read or written locklessly.
+
+Add needed READ_ONCE()/WRITE_ONCE() to document this,
+and force use of tcp_metric_get() and tcp_metric_set()
+
+Fixes: 51c5d0c4b169 ("tcp: Maintain dynamic metrics in local cache.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 131fa30049691..fd4ab7a51cef2 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -63,17 +63,19 @@ static bool tcp_metric_locked(struct tcp_metrics_block *tm,
+       return READ_ONCE(tm->tcpm_lock) & (1 << idx);
+ }
+ 
+-static u32 tcp_metric_get(struct tcp_metrics_block *tm,
++static u32 tcp_metric_get(const struct tcp_metrics_block *tm,
+                         enum tcp_metric_index idx)
+ {
+-      return tm->tcpm_vals[idx];
++      /* Paired with WRITE_ONCE() in tcp_metric_set() */
++      return READ_ONCE(tm->tcpm_vals[idx]);
+ }
+ 
+ static void tcp_metric_set(struct tcp_metrics_block *tm,
+                          enum tcp_metric_index idx,
+                          u32 val)
+ {
+-      tm->tcpm_vals[idx] = val;
++      /* Paired with READ_ONCE() in tcp_metric_get() */
++      WRITE_ONCE(tm->tcpm_vals[idx], val);
+ }
+ 
+ static bool addr_same(const struct inetpeer_addr *a,
+@@ -115,13 +117,16 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       WRITE_ONCE(tm->tcpm_lock, val);
+ 
+       msval = dst_metric_raw(dst, RTAX_RTT);
+-      tm->tcpm_vals[TCP_METRIC_RTT] = msval * USEC_PER_MSEC;
++      tcp_metric_set(tm, TCP_METRIC_RTT, msval * USEC_PER_MSEC);
+ 
+       msval = dst_metric_raw(dst, RTAX_RTTVAR);
+-      tm->tcpm_vals[TCP_METRIC_RTTVAR] = msval * USEC_PER_MSEC;
+-      tm->tcpm_vals[TCP_METRIC_SSTHRESH] = dst_metric_raw(dst, RTAX_SSTHRESH);
+-      tm->tcpm_vals[TCP_METRIC_CWND] = dst_metric_raw(dst, RTAX_CWND);
+-      tm->tcpm_vals[TCP_METRIC_REORDERING] = dst_metric_raw(dst, RTAX_REORDERING);
++      tcp_metric_set(tm, TCP_METRIC_RTTVAR, msval * USEC_PER_MSEC);
++      tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
++                     dst_metric_raw(dst, RTAX_SSTHRESH));
++      tcp_metric_set(tm, TCP_METRIC_CWND,
++                     dst_metric_raw(dst, RTAX_CWND));
++      tcp_metric_set(tm, TCP_METRIC_REORDERING,
++                     dst_metric_raw(dst, RTAX_REORDERING));
+       if (fastopen_clear) {
+               tm->tcpm_fastopen.mss = 0;
+               tm->tcpm_fastopen.syn_loss = 0;
+@@ -667,7 +672,7 @@ static int tcp_metrics_fill_info(struct sk_buff *msg,
+               if (!nest)
+                       goto nla_put_failure;
+               for (i = 0; i < TCP_METRIC_MAX_KERNEL + 1; i++) {
+-                      u32 val = tm->tcpm_vals[i];
++                      u32 val = tcp_metric_get(tm, i);
+ 
+                       if (!val)
+                               continue;
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-fix-addr_same-helper.patch b/queue-6.4/tcp_metrics-fix-addr_same-helper.patch

new file mode 100644 (file)

index 0000000..e64f991
--- /dev/null
+++ b/queue-6.4/tcp_metrics-fix-addr_same-helper.patch
@@ -0,0 +1,46 @@
+From f93b104c0bfe79d118b84ad2a342eeb8006b01ee Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:14:55 +0000
+Subject: tcp_metrics: fix addr_same() helper
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit e6638094d7af6c7b9dcca05ad009e79e31b4f670 ]
+
+Because v4 and v6 families use separate inetpeer trees (respectively
+net->ipv4.peers and net->ipv6.peers), inetpeer_addr_cmp(a, b) assumes
+a & b share the same family.
+
+tcp_metrics use a common hash table, where entries can have different
+families.
+
+We must therefore make sure to not call inetpeer_addr_cmp()
+if the families do not match.
+
+Fixes: d39d14ffa24c ("net: Add helper function to compare inetpeer addresses")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 82f4575f9cd90..c4daf0aa2d4d9 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -78,7 +78,7 @@ static void tcp_metric_set(struct tcp_metrics_block *tm,
+ static bool addr_same(const struct inetpeer_addr *a,
+                     const struct inetpeer_addr *b)
+ {
+-      return inetpeer_addr_cmp(a, b) == 0;
++      return (a->family == b->family) && !inetpeer_addr_cmp(a, b);
+ }
+ 
+ struct tcpm_hash_bucket {
+-- 
+2.40.1
+
diff --git a/queue-6.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch b/queue-6.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch

new file mode 100644 (file)

index 0000000..3752ea1
--- /dev/null
+++ b/queue-6.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch
@@ -0,0 +1,85 @@
+From b0aa0d121575c4f78003108a40bb09e2473ff250 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Aug 2023 13:15:00 +0000
+Subject: tcp_metrics: fix data-race in tcpm_suck_dst() vs fastopen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ddf251fa2bc1d3699eec0bae6ed0bc373b8fda79 ]
+
+Whenever tcpm_new() reclaims an old entry, tcpm_suck_dst()
+would overwrite data that could be read from tcp_fastopen_cache_get()
+or tcp_metrics_fill_info().
+
+We need to acquire fastopen_seqlock to maintain consistency.
+
+For newly allocated objects, tcpm_new() can switch to kzalloc()
+to avoid an extra fastopen_seqlock acquisition.
+
+Fixes: 1fe4c481ba63 ("net-tcp: Fast Open client - cookie cache")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230802131500.1478140-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_metrics.c | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
+index 4fd274836a48f..99ac5efe244d3 100644
+--- a/net/ipv4/tcp_metrics.c
++++ b/net/ipv4/tcp_metrics.c
+@@ -93,6 +93,7 @@ static struct tcpm_hash_bucket       *tcp_metrics_hash __read_mostly;
+ static unsigned int           tcp_metrics_hash_log __read_mostly;
+ 
+ static DEFINE_SPINLOCK(tcp_metrics_lock);
++static DEFINE_SEQLOCK(fastopen_seqlock);
+ 
+ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+                         const struct dst_entry *dst,
+@@ -129,11 +130,13 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm,
+       tcp_metric_set(tm, TCP_METRIC_REORDERING,
+                      dst_metric_raw(dst, RTAX_REORDERING));
+       if (fastopen_clear) {
++              write_seqlock(&fastopen_seqlock);
+               tm->tcpm_fastopen.mss = 0;
+               tm->tcpm_fastopen.syn_loss = 0;
+               tm->tcpm_fastopen.try_exp = 0;
+               tm->tcpm_fastopen.cookie.exp = false;
+               tm->tcpm_fastopen.cookie.len = 0;
++              write_sequnlock(&fastopen_seqlock);
+       }
+ }
+ 
+@@ -194,7 +197,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+               }
+               tm = oldest;
+       } else {
+-              tm = kmalloc(sizeof(*tm), GFP_ATOMIC);
++              tm = kzalloc(sizeof(*tm), GFP_ATOMIC);
+               if (!tm)
+                       goto out_unlock;
+       }
+@@ -204,7 +207,7 @@ static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
+       tm->tcpm_saddr = *saddr;
+       tm->tcpm_daddr = *daddr;
+ 
+-      tcpm_suck_dst(tm, dst, true);
++      tcpm_suck_dst(tm, dst, reclaim);
+ 
+       if (likely(!reclaim)) {
+               tm->tcpm_next = tcp_metrics_hash[hash].chain;
+@@ -556,8 +559,6 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst)
+       return ret;
+ }
+ 
+-static DEFINE_SEQLOCK(fastopen_seqlock);
+-
+ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
+                           struct tcp_fastopen_cookie *cookie)
+ {
+-- 
+2.40.1
+
diff --git a/queue-6.4/test-vsock-remove-vsock_perf-executable-on-make-clea.patch b/queue-6.4/test-vsock-remove-vsock_perf-executable-on-make-clea.patch

new file mode 100644 (file)

index 0000000..3945c43
--- /dev/null
+++ b/queue-6.4/test-vsock-remove-vsock_perf-executable-on-make-clea.patch
@@ -0,0 +1,39 @@
+From d00b1ca2466ecbbc06cb38e8197dcc51869ba002 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 3 Aug 2023 10:54:54 +0200
+Subject: test/vsock: remove vsock_perf executable on `make clean`
+
+From: Stefano Garzarella <sgarzare@redhat.com>
+
+[ Upstream commit 3c50c8b240390907c9a33c86d25d850520db6dfa ]
+
+We forgot to add vsock_perf to the rm command in the `clean`
+target, so now we have a left over after `make clean` in
+tools/testing/vsock.
+
+Fixes: 8abbffd27ced ("test/vsock: vsock_perf utility")
+Cc: AVKrasnov@sberdevices.ru
+Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Tested-by: Simon Horman <horms@kernel.org> # build-tested
+Link: https://lore.kernel.org/r/20230803085454.30897-1-sgarzare@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/testing/vsock/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
+index 43a254f0e14dd..21a98ba565ab5 100644
+--- a/tools/testing/vsock/Makefile
++++ b/tools/testing/vsock/Makefile
+@@ -8,5 +8,5 @@ vsock_perf: vsock_perf.o
+ CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -D_GNU_SOURCE
+ .PHONY: all test clean
+ clean:
+-      ${RM} *.o *.d vsock_test vsock_diag_test
++      ${RM} *.o *.d vsock_test vsock_diag_test vsock_perf
+ -include *.d
+-- 
+2.40.1
+
diff --git a/queue-6.4/vxlan-fix-nexthop-hash-size.patch b/queue-6.4/vxlan-fix-nexthop-hash-size.patch

new file mode 100644 (file)

index 0000000..7862a6c
--- /dev/null
+++ b/queue-6.4/vxlan-fix-nexthop-hash-size.patch
@@ -0,0 +1,175 @@
+From eab582ca6038992dab9bba680dde374b1976232d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 31 Jul 2023 16:02:08 -0400
+Subject: vxlan: Fix nexthop hash size
+
+From: Benjamin Poirier <bpoirier@nvidia.com>
+
+[ Upstream commit 0756384fb1bd38adb2ebcfd1307422f433a1d772 ]
+
+The nexthop code expects a 31 bit hash, such as what is returned by
+fib_multipath_hash() and rt6_multipath_hash(). Passing the 32 bit hash
+returned by skb_get_hash() can lead to problems related to the fact that
+'int hash' is a negative number when the MSB is set.
+
+In the case of hash threshold nexthop groups, nexthop_select_path_hthr()
+will disproportionately select the first nexthop group entry. In the case
+of resilient nexthop groups, nexthop_select_path_res() may do an out of
+bounds access in nh_buckets[], for example:
+    hash = -912054133
+    num_nh_buckets = 2
+    bucket_index = 65535
+
+which leads to the following panic:
+
+BUG: unable to handle page fault for address: ffffc900025910c8
+PGD 100000067 P4D 100000067 PUD 10026b067 PMD 0
+Oops: 0002 [#1] PREEMPT SMP KASAN NOPTI
+CPU: 4 PID: 856 Comm: kworker/4:3 Not tainted 6.5.0-rc2+ #34
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
+Workqueue: ipv6_addrconf addrconf_dad_work
+RIP: 0010:nexthop_select_path+0x197/0xbf0
+Code: c1 e4 05 be 08 00 00 00 4c 8b 35 a4 14 7e 01 4e 8d 6c 25 00 4a 8d 7c 25 08 48 01 dd e8 c2 25 15 ff 49 8d 7d 08 e8 39 13 15 ff <4d> 89 75 08 48 89 ef e8 7d 12 15 ff 48 8b 5d 00 e8 14 55 2f 00 85
+RSP: 0018:ffff88810c36f260 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 00000000002000c0 RCX: ffffffffaf02dd77
+RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffffc900025910c8
+RBP: ffffc900025910c0 R08: 0000000000000001 R09: fffff520004b2219
+R10: ffffc900025910cf R11: 31392d2068736168 R12: 00000000002000c0
+R13: ffffc900025910c0 R14: 00000000fffef608 R15: ffff88811840e900
+FS:  0000000000000000(0000) GS:ffff8881f7000000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffc900025910c8 CR3: 0000000129d00000 CR4: 0000000000750ee0
+PKRU: 55555554
+Call Trace:
+ <TASK>
+ ? __die+0x23/0x70
+ ? page_fault_oops+0x1ee/0x5c0
+ ? __pfx_is_prefetch.constprop.0+0x10/0x10
+ ? __pfx_page_fault_oops+0x10/0x10
+ ? search_bpf_extables+0xfe/0x1c0
+ ? fixup_exception+0x3b/0x470
+ ? exc_page_fault+0xf6/0x110
+ ? asm_exc_page_fault+0x26/0x30
+ ? nexthop_select_path+0x197/0xbf0
+ ? nexthop_select_path+0x197/0xbf0
+ ? lock_is_held_type+0xe7/0x140
+ vxlan_xmit+0x5b2/0x2340
+ ? __lock_acquire+0x92b/0x3370
+ ? __pfx_vxlan_xmit+0x10/0x10
+ ? __pfx___lock_acquire+0x10/0x10
+ ? __pfx_register_lock_class+0x10/0x10
+ ? skb_network_protocol+0xce/0x2d0
+ ? dev_hard_start_xmit+0xca/0x350
+ ? __pfx_vxlan_xmit+0x10/0x10
+ dev_hard_start_xmit+0xca/0x350
+ __dev_queue_xmit+0x513/0x1e20
+ ? __pfx___dev_queue_xmit+0x10/0x10
+ ? __pfx_lock_release+0x10/0x10
+ ? mark_held_locks+0x44/0x90
+ ? skb_push+0x4c/0x80
+ ? eth_header+0x81/0xe0
+ ? __pfx_eth_header+0x10/0x10
+ ? neigh_resolve_output+0x215/0x310
+ ? ip6_finish_output2+0x2ba/0xc90
+ ip6_finish_output2+0x2ba/0xc90
+ ? lock_release+0x236/0x3e0
+ ? ip6_mtu+0xbb/0x240
+ ? __pfx_ip6_finish_output2+0x10/0x10
+ ? find_held_lock+0x83/0xa0
+ ? lock_is_held_type+0xe7/0x140
+ ip6_finish_output+0x1ee/0x780
+ ip6_output+0x138/0x460
+ ? __pfx_ip6_output+0x10/0x10
+ ? __pfx___lock_acquire+0x10/0x10
+ ? __pfx_ip6_finish_output+0x10/0x10
+ NF_HOOK.constprop.0+0xc0/0x420
+ ? __pfx_NF_HOOK.constprop.0+0x10/0x10
+ ? ndisc_send_skb+0x2c0/0x960
+ ? __pfx_lock_release+0x10/0x10
+ ? __local_bh_enable_ip+0x93/0x110
+ ? lock_is_held_type+0xe7/0x140
+ ndisc_send_skb+0x4be/0x960
+ ? __pfx_ndisc_send_skb+0x10/0x10
+ ? mark_held_locks+0x65/0x90
+ ? find_held_lock+0x83/0xa0
+ ndisc_send_ns+0xb0/0x110
+ ? __pfx_ndisc_send_ns+0x10/0x10
+ addrconf_dad_work+0x631/0x8e0
+ ? lock_acquire+0x180/0x3f0
+ ? __pfx_addrconf_dad_work+0x10/0x10
+ ? mark_held_locks+0x24/0x90
+ process_one_work+0x582/0x9c0
+ ? __pfx_process_one_work+0x10/0x10
+ ? __pfx_do_raw_spin_lock+0x10/0x10
+ ? mark_held_locks+0x24/0x90
+ worker_thread+0x93/0x630
+ ? __kthread_parkme+0xdc/0x100
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0x1a5/0x1e0
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x34/0x60
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1b/0x30
+RIP: 0000:0x0
+Code: Unable to access opcode bytes at 0xffffffffffffffd6.
+RSP: 0000:0000000000000000 EFLAGS: 00000000 ORIG_RAX: 0000000000000000
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
+RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ </TASK>
+Modules linked in:
+CR2: ffffc900025910c8
+---[ end trace 0000000000000000 ]---
+RIP: 0010:nexthop_select_path+0x197/0xbf0
+Code: c1 e4 05 be 08 00 00 00 4c 8b 35 a4 14 7e 01 4e 8d 6c 25 00 4a 8d 7c 25 08 48 01 dd e8 c2 25 15 ff 49 8d 7d 08 e8 39 13 15 ff <4d> 89 75 08 48 89 ef e8 7d 12 15 ff 48 8b 5d 00 e8 14 55 2f 00 85
+RSP: 0018:ffff88810c36f260 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 00000000002000c0 RCX: ffffffffaf02dd77
+RDX: dffffc0000000000 RSI: 0000000000000008 RDI: ffffc900025910c8
+RBP: ffffc900025910c0 R08: 0000000000000001 R09: fffff520004b2219
+R10: ffffc900025910cf R11: 31392d2068736168 R12: 00000000002000c0
+R13: ffffc900025910c0 R14: 00000000fffef608 R15: ffff88811840e900
+FS:  0000000000000000(0000) GS:ffff8881f7000000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: ffffffffffffffd6 CR3: 0000000129d00000 CR4: 0000000000750ee0
+PKRU: 55555554
+Kernel panic - not syncing: Fatal exception in interrupt
+Kernel Offset: 0x2ca00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+---[ end Kernel panic - not syncing: Fatal exception in interrupt ]---
+
+Fix this problem by ensuring the MSB of hash is 0 using a right shift - the
+same approach used in fib_multipath_hash() and rt6_multipath_hash().
+
+Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries")
+Signed-off-by: Benjamin Poirier <bpoirier@nvidia.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/vxlan.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/include/net/vxlan.h b/include/net/vxlan.h
+index b57567296bc67..fae2893613aa2 100644
+--- a/include/net/vxlan.h
++++ b/include/net/vxlan.h
+@@ -554,12 +554,12 @@ static inline void vxlan_flag_attr_error(int attrtype,
+ }
+ 
+ static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
+-                                          int hash,
++                                          u32 hash,
+                                           struct vxlan_rdst *rdst)
+ {
+       struct fib_nh_common *nhc;
+ 
+-      nhc = nexthop_path_fdb_result(nh, hash);
++      nhc = nexthop_path_fdb_result(nh, hash >> 1);
+       if (unlikely(!nhc))
+               return false;
+ 
+-- 
+2.40.1
+
diff --git a/queue-6.4/wifi-cfg80211-fix-return-value-in-scan-logic.patch b/queue-6.4/wifi-cfg80211-fix-return-value-in-scan-logic.patch

new file mode 100644 (file)

index 0000000..9ec1366
--- /dev/null
+++ b/queue-6.4/wifi-cfg80211-fix-return-value-in-scan-logic.patch
@@ -0,0 +1,43 @@
+From 4a8c0c43f2036136a80a89a40de9e7630f5f5c75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 23 Jul 2023 23:10:43 +0300
+Subject: wifi: cfg80211: Fix return value in scan logic
+
+From: Ilan Peer <ilan.peer@intel.com>
+
+[ Upstream commit fd7f08d92fcd7cc3eca0dd6c853f722a4c6176df ]
+
+The reporter noticed a warning when running iwlwifi:
+
+WARNING: CPU: 8 PID: 659 at mm/page_alloc.c:4453 __alloc_pages+0x329/0x340
+
+As cfg80211_parse_colocated_ap() is not expected to return a negative
+value return 0 and not a negative value if cfg80211_calc_short_ssid()
+fails.
+
+Fixes: c8cb5b854b40f ("nl80211/cfg80211: support 6 GHz scanning")
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217675
+Signed-off-by: Ilan Peer <ilan.peer@intel.com>
+Signed-off-by: Kalle Valo <kvalo@kernel.org>
+Link: https://lore.kernel.org/r/20230723201043.3007430-1-ilan.peer@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/scan.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/wireless/scan.c b/net/wireless/scan.c
+index 396c63431e1f3..e9a3b0f724f18 100644
+--- a/net/wireless/scan.c
++++ b/net/wireless/scan.c
+@@ -640,7 +640,7 @@ static int cfg80211_parse_colocated_ap(const struct cfg80211_bss_ies *ies,
+ 
+       ret = cfg80211_calc_short_ssid(ies, &ssid_elem, &s_ssid_tmp);
+       if (ret)
+-              return ret;
++              return 0;
+ 
+       /* RNR IE may contain more than one NEIGHBOR_AP_INFO */
+       while (pos + sizeof(*ap_info) <= end) {
+-- 
+2.40.1
+
diff --git a/queue-6.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch b/queue-6.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch

new file mode 100644 (file)

index 0000000..05ab388
--- /dev/null
+++ b/queue-6.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch
@@ -0,0 +1,74 @@
+From 997d198b9619c7b87a2ba89f5c7ffed7c17186ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 1 Aug 2023 15:22:17 -0700
+Subject: word-at-a-time: use the same return type for has_zero regardless of
+ endianness
+
+From: ndesaulniers@google.com <ndesaulniers@google.com>
+
+[ Upstream commit 79e8328e5acbe691bbde029a52c89d70dcbc22f3 ]
+
+Compiling big-endian targets with Clang produces the diagnostic:
+
+  fs/namei.c:2173:13: warning: use of bitwise '|' with boolean operands [-Wbitwise-instead-of-logical]
+       } while (!(has_zero(a, &adata, &constants) | has_zero(b, &bdata, &constants)));
+                 ~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+                                               ||
+  fs/namei.c:2173:13: note: cast one or both operands to int to silence this warning
+
+It appears that when has_zero was introduced, two definitions were
+produced with different signatures (in particular different return
+types).
+
+Looking at the usage in hash_name() in fs/namei.c, I suspect that
+has_zero() is meant to be invoked twice per while loop iteration; using
+logical-or would not update `bdata` when `a` did not have zeros.  So I
+think it's preferred to always return an unsigned long rather than a
+bool than update the while loop in hash_name() to use a logical-or
+rather than bitwise-or.
+
+[ Also changed powerpc version to do the same  - Linus ]
+
+Link: https://github.com/ClangBuiltLinux/linux/issues/1832
+Link: https://lore.kernel.org/lkml/20230801-bitwise-v1-1-799bec468dc4@google.com/
+Fixes: 36126f8f2ed8 ("word-at-a-time: make the interfaces truly generic")
+Debugged-by: Nathan Chancellor <nathan@kernel.org>
+Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Cc: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/word-at-a-time.h | 2 +-
+ include/asm-generic/word-at-a-time.h      | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
+index 46c31fb8748d5..30a12d2086871 100644
+--- a/arch/powerpc/include/asm/word-at-a-time.h
++++ b/arch/powerpc/include/asm/word-at-a-time.h
+@@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask)
+       return leading_zero_bits >> 3;
+ }
+ 
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+       unsigned long rhs = val | c->low_bits;
+       *data = rhs;
+diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
+index 20c93f08c9933..95a1d214108a5 100644
+--- a/include/asm-generic/word-at-a-time.h
++++ b/include/asm-generic/word-at-a-time.h
+@@ -38,7 +38,7 @@ static inline long find_zero(unsigned long mask)
+       return (mask >> 8) ? byte : byte + 1;
+ }
+ 
+-static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
++static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
+ {
+       unsigned long rhs = val | c->low_bits;
+       *data = rhs;
+-- 
+2.40.1
+
author	Sasha Levin <sashal@kernel.org>
	Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Sat, 5 Aug 2023 20:49:58 +0000 (16:49 -0400)
queue-6.4/arm-dts-at91-sam9x60-fix-the-soc-detection.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm-dts-at91-use-clock-controller-name-for-pmc-nodes.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm-dts-at91-use-clock-controller-name-for-sckc-node.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm-dts-at91-use-generic-name-for-shutdown-controlle.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm-dts-nxp-imx-limit-sk-imx53-supported-frequencies.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-freescale-fix-vpu-g2-clock.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-imx8mm-venice-gw7903-disable-disp_blk_ctrl.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-imx8mm-venice-gw7904-disable-disp_blk_ctrl.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-imx8mn-var-som-add-missing-pull-up-for-onb.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-phycore-imx8mm-correction-in-gpio-line-nam.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/arm64-dts-phycore-imx8mm-label-typo-fix-of-vpu.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bnxt-don-t-handle-xdp-in-netpoll.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bnxt_en-fix-max_mtu-setting-for-multi-buf-xdp.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bnxt_en-fix-page-pool-logic-for-page-size-64k.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-add-length-check-for-sk_diag_bpf_storage_req_map.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-centralize-permissions-checks-for-all-bpf-map-ty.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-cpumap-handle-skb-as-well-when-clean-up-ptr_ring.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-cpumap-make-sure-kthread-is-running-before-map-u.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-inline-map-creation-logic-in-map_create-function.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-move-unprivileged-checks-into-map_create-and-bpf.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/bpf-sockmap-remove-preempt_disable-in-sock_map_sk_ac.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/erofs-fix-wrong-primary-bvec-selection-on-deduplicat.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/firmware-arm_scmi-fix-chan_free-cleanup-on-smc.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/firmware-arm_scmi-fix-signed-error-return-values-han.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/firmware-smccc-fix-use-of-uninitialised-results-stru.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ice-fix-rdma-vsi-removal-during-queue-rebuild.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/ip6mr-fix-skb_under_panic-in-ip6mr_cache_report.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/kvm-s390-fix-sthyi-error-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/lib-bitmap-workaround-const_eval-test-build-failure.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/misdn-hfcpci-fix-potential-deadlock-on-hc-lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-add-missing-data-race-annotation-for-sk_ll_usec.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-add-missing-data-race-annotations-around-sk-sk_p.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-add-missing-read_once-sk-sk_rcvbuf-annotation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-add-missing-read_once-sk-sk_rcvlowat-annotation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-add-missing-read_once-sk-sk_sndbuf-annotation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-annotate-data-race-around-sk-sk_txrehash.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-annotate-data-races-around-sk-sk_mark.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-annotate-data-races-around-sk-sk_max_pacing_rate.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-annotate-data-races-around-sk-sk_priority.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-annotate-data-races-around-sk-sk_reserved_mem.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dcb-choose-correct-policy-to-parse-dcb_attr_bcn.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-dsa-fix-value-check-in-bcm_sf2_sw_probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-gro-fix-misuse-of-cb-in-udp-socket-lookup.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-korina-handle-clk-prepare-error-in-korina_probe.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-ll_temac-fix-error-checking-of-irq_of_parse_and_.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-dr-fix-memory-leak-in-mlx5dr_cmd_create_ref.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-fix-potential-memory-leak-in-mlx5e_init_rep.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-fs_chains-fix-ft-prio-if-ignore_flow_level-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-fs_core-make-find_closest_ft-more-generic.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-fs_core-skip-the-fts-in-the-same-fs_type_pr.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-honor-user-input-for-migratable-port-fn-att.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5-unregister-devlink-params-in-case-interface.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-don-t-hold-encap-tbl-lock-if-there-is-no-e.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-crash-moving-to-switchdev-mode-when-nt.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-double-free-in-macsec_fs_tx_create_cry.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-fix-return-value-check-in-mlx5e_ipsec_remo.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-ktls-fix-protection-domain-in-use-syndrome.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-move-representor-neigh-cleanup-to-profile-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-set-proper-ipsec-source-port-in-l4-selecto.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-xsk-fix-crash-on-regular-rq-reactivation.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-mlx5e-xsk-fix-invalid-buffer-access-for-legacy-r.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-move-gso-declarations-and-functions-to-their-own.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-netsec-ignore-phy-mode-on-synquacer-in-dt-mode.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-cls_fw-no-longer-copy-tcf_result-on-update.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-cls_route-no-longer-copy-tcf_result-on-upd.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-cls_u32-fix-match-key-mis-addressing.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-cls_u32-no-longer-copy-tcf_result-on-updat.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-sched-taprio-limit-tca_taprio_attr_sched_cycle_t.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-stmmac-tegra-properly-allocate-clock-bulk-data.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/net-usb-lan78xx-reorder-cleanup-operations-to-avoid-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/octeon_ep-initialize-mbox-mutexes.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/perf-pmu-arm64-fix-reading-the-pmu-cpu-slots-in-sysf.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/perf-test-uprobe_from_different_cu-skip-if-there-is-.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/prestera-fix-fallback-to-previous-version-on-same-ma.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/qed-fix-scheduling-in-a-tasklet-while-getting-stats.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/rtnetlink-let-rtnl_bridge_setlink-checks-ifla_bridge.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/s390-qeth-don-t-call-dev_close-dev_open-down-up.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/s390-vmem-split-pages-when-debug-pagealloc-is-enable.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/selftest-net-assert-on-a-proper-value-in-so_incoming.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/series		patch \| blob \| blame \| history
queue-6.4/soc-imx-imx8mp-blk-ctrl-register-hsio-pll-clock-as-b.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_net.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_stamp.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-annotate-data-races-around-tm-tcpm_vals.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-fix-addr_same-helper.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/tcp_metrics-fix-data-race-in-tcpm_suck_dst-vs-fastop.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/test-vsock-remove-vsock_perf-executable-on-make-clea.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/vxlan-fix-nexthop-hash-size.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/wifi-cfg80211-fix-return-value-in-scan-logic.patch	[new file with mode: 0644]	patch \| blob
queue-6.4/word-at-a-time-use-the-same-return-type-for-has_zero.patch	[new file with mode: 0644]	patch \| blob