From: Sasha Levin Date: Sun, 11 Apr 2021 18:23:33 +0000 (-0400) Subject: Fixes for 5.4 X-Git-Tag: v4.19.187~46 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=e1018b73a20af3cdce352fa8000b01ade2fe98fc;p=thirdparty%2Fkernel%2Fstable-queue.git Fixes for 5.4 Signed-off-by: Sasha Levin --- diff --git a/queue-5.4/net-dsa-lantiq_gswip-configure-all-remaining-gswip_m.patch b/queue-5.4/net-dsa-lantiq_gswip-configure-all-remaining-gswip_m.patch new file mode 100644 index 00000000000..0ae436f0257 --- /dev/null +++ b/queue-5.4/net-dsa-lantiq_gswip-configure-all-remaining-gswip_m.patch @@ -0,0 +1,102 @@ +From 24d4b996e6031700ec91aba70b5ac86cd24a1d60 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 11 Apr 2021 12:23:44 +0200 +Subject: net: dsa: lantiq_gswip: Configure all remaining GSWIP_MII_CFG bits + +From: Martin Blumenstingl + +commit 4b5923249b8fa427943b50b8f35265176472be38 upstream. + +There are a few more bits in the GSWIP_MII_CFG register for which we +did rely on the boot-loader (or the hardware defaults) to set them up +properly. + +For some external RMII PHYs we need to select the GSWIP_MII_CFG_RMII_CLK +bit and also we should un-set it for non-RMII PHYs. The +GSWIP_MII_CFG_RMII_CLK bit is ignored for other PHY connection modes. + +The GSWIP IP also supports in-band auto-negotiation for RGMII PHYs when +the GSWIP_MII_CFG_RGMII_IBS bit is set. Clear this bit always as there's +no known hardware which uses this (so it is not tested yet). + +Clear the xMII isolation bit when set at initialization time if it was +previously set by the bootloader. Not doing so could lead to no traffic +(neither RX nor TX) on a port with this bit set. + +While here, also add the GSWIP_MII_CFG_RESET bit. We don't need to +manage it because this bit is self-clearning when set. We still add it +here to get a better overview of the GSWIP_MII_CFG register. + +Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") +Cc: stable@vger.kernel.org +Suggested-by: Hauke Mehrtens +Acked-by: Hauke Mehrtens +Signed-off-by: Martin Blumenstingl +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +[ Updated after the upstream commit 3e9005be87777 required some changes + for Linux 5.4 ] +Signed-off-by: Martin Blumenstingl +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/lantiq_gswip.c | 19 ++++++++++++++++--- + 1 file changed, 16 insertions(+), 3 deletions(-) + +diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c +index e0f5d406e6c0..dc75e798dbff 100644 +--- a/drivers/net/dsa/lantiq_gswip.c ++++ b/drivers/net/dsa/lantiq_gswip.c +@@ -93,8 +93,12 @@ + + /* GSWIP MII Registers */ + #define GSWIP_MII_CFGp(p) (0x2 * (p)) ++#define GSWIP_MII_CFG_RESET BIT(15) + #define GSWIP_MII_CFG_EN BIT(14) ++#define GSWIP_MII_CFG_ISOLATE BIT(13) + #define GSWIP_MII_CFG_LDCLKDIS BIT(12) ++#define GSWIP_MII_CFG_RGMII_IBS BIT(8) ++#define GSWIP_MII_CFG_RMII_CLK BIT(7) + #define GSWIP_MII_CFG_MODE_MIIP 0x0 + #define GSWIP_MII_CFG_MODE_MIIM 0x1 + #define GSWIP_MII_CFG_MODE_RMIIP 0x2 +@@ -817,9 +821,11 @@ static int gswip_setup(struct dsa_switch *ds) + /* Configure the MDIO Clock 2.5 MHz */ + gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); + +- /* Disable the xMII link */ ++ /* Disable the xMII interface and clear it's isolation bit */ + for (i = 0; i < priv->hw_info->max_ports; i++) +- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); ++ gswip_mii_mask_cfg(priv, ++ GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE, ++ 0, i); + + /* enable special tag insertion on cpu port */ + gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, +@@ -1594,6 +1600,9 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, + break; + case PHY_INTERFACE_MODE_RMII: + miicfg |= GSWIP_MII_CFG_MODE_RMIIM; ++ ++ /* Configure the RMII clock as output: */ ++ miicfg |= GSWIP_MII_CFG_RMII_CLK; + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: +@@ -1606,7 +1615,11 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, + "Unsupported interface: %d\n", state->interface); + return; + } +- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port); ++ ++ gswip_mii_mask_cfg(priv, ++ GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK | ++ GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS, ++ miicfg, port); + + gswip_port_set_speed(priv, port, state->speed, state->interface); + gswip_port_set_duplex(priv, port, state->duplex); +-- +2.30.2 + diff --git a/queue-5.4/net-dsa-lantiq_gswip-don-t-use-phy-auto-polling.patch b/queue-5.4/net-dsa-lantiq_gswip-don-t-use-phy-auto-polling.patch new file mode 100644 index 00000000000..99bd7dc5ae1 --- /dev/null +++ b/queue-5.4/net-dsa-lantiq_gswip-don-t-use-phy-auto-polling.patch @@ -0,0 +1,337 @@ +From 56dd98de3880dc64717b3d92dd15ba6160da89f5 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Sun, 11 Apr 2021 12:23:43 +0200 +Subject: net: dsa: lantiq_gswip: Don't use PHY auto polling + +From: Martin Blumenstingl + +commit 3e9005be87777afc902b9f5497495898202d335d upstream. + +PHY auto polling on the GSWIP hardware can be used so link changes +(speed, link up/down, etc.) can be detected automatically. Internally +GSWIP reads the PHY's registers for this functionality. Based on this +automatic detection GSWIP can also automatically re-configure it's port +settings. Unfortunately this auto polling (and configuration) mechanism +seems to cause various issues observed by different people on different +devices: +- FritzBox 7360v2: the two Gbit/s ports (connected to the two internal + PHY11G instances) are working fine but the two Fast Ethernet ports + (using an AR8030 RMII PHY) are completely dead (neither RX nor TX are + received). It turns out that the AR8030 PHY sets the BMSR_ESTATEN bit + as well as the ESTATUS_1000_TFULL and ESTATUS_1000_XFULL bits. This + makes the PHY auto polling state machine (rightfully?) think that the + established link speed (when the other side is Gbit/s capable) is + 1Gbit/s. +- None of the Ethernet ports on the Zyxel P-2812HNU-F1 (two are + connected to the internal PHY11G GPHYs while the other three are + external RGMII PHYs) are working. Neither RX nor TX traffic was + observed. It is not clear which part of the PHY auto polling state- + machine caused this. +- FritzBox 7412 (only one LAN port which is connected to one of the + internal GPHYs running in PHY22F / Fast Ethernet mode) was seeing + random disconnects (link down events could be seen). Sometimes all + traffic would stop after such disconnect. It is not clear which part + of the PHY auto polling state-machine cauased this. +- TP-Link TD-W9980 (two ports are connected to the internal GPHYs + running in PHY11G / Gbit/s mode, the other two are external RGMII + PHYs) was affected by similar issues as the FritzBox 7412 just without + the "link down" events + +Switch to software based configuration instead of PHY auto polling (and +letting the GSWIP hardware configure the ports automatically) for the +following link parameters: +- link up/down +- link speed +- full/half duplex +- flow control (RX / TX pause) + +After a big round of manual testing by various people (who helped test +this on OpenWrt) it turns out that this fixes all reported issues. + +Additionally it can be considered more future proof because any +"quirk" which is implemented for a PHY on the driver side can now be +used with the GSWIP hardware as well because Linux is in control of the +link parameters. + +As a nice side-effect this also solves a problem where fixed-links were +not supported previously because we were relying on the PHY auto polling +mechanism, which cannot work for fixed-links as there's no PHY from +where it can read the registers. Configuring the link settings on the +GSWIP ports means that we now use the settings from device-tree also for +ports with fixed-links. + +Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200") +Fixes: 3e6fdeb28f4c33 ("net: dsa: lantiq_gswip: Let GSWIP automatically set the xMII clock") +Cc: stable@vger.kernel.org +Acked-by: Hauke Mehrtens +Reviewed-by: Andrew Lunn +Signed-off-by: Martin Blumenstingl +Reviewed-by: Florian Fainelli +Signed-off-by: David S. Miller +Signed-off-by: Greg Kroah-Hartman +[ Move gswip_port_set_{speed, duplex, pause} calls from + gswip_phylink_mac_link_up to gswip_phylink_mac_config because the + data required for these functions is not available inside + gswip_phylink_mac_link_up yet in Linux 5.4 (it was only added with + Linux 5.7). ] +Signed-off-by: Martin Blumenstingl +Signed-off-by: Sasha Levin +--- + drivers/net/dsa/lantiq_gswip.c | 186 ++++++++++++++++++++++++++++----- + 1 file changed, 160 insertions(+), 26 deletions(-) + +diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c +index 14019b3197f6..e0f5d406e6c0 100644 +--- a/drivers/net/dsa/lantiq_gswip.c ++++ b/drivers/net/dsa/lantiq_gswip.c +@@ -190,6 +190,23 @@ + #define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA)) + + #define GSWIP_MAC_FLEN 0x8C5 ++#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC)) ++#define GSWIP_MAC_CTRL_0_PADEN BIT(8) ++#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7) ++#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070 ++#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000 ++#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010 ++#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020 ++#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030 ++#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040 ++#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C ++#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000 ++#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004 ++#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C ++#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003 ++#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000 ++#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001 ++#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002 + #define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC)) + #define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */ + +@@ -653,16 +670,13 @@ static int gswip_port_enable(struct dsa_switch *ds, int port, + GSWIP_SDMA_PCTRLp(port)); + + if (!dsa_is_cpu_port(ds, port)) { +- u32 macconf = GSWIP_MDIO_PHY_LINK_AUTO | +- GSWIP_MDIO_PHY_SPEED_AUTO | +- GSWIP_MDIO_PHY_FDUP_AUTO | +- GSWIP_MDIO_PHY_FCONTX_AUTO | +- GSWIP_MDIO_PHY_FCONRX_AUTO | +- (phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK); +- +- gswip_mdio_w(priv, macconf, GSWIP_MDIO_PHYp(port)); +- /* Activate MDIO auto polling */ +- gswip_mdio_mask(priv, 0, BIT(port), GSWIP_MDIO_MDC_CFG0); ++ u32 mdio_phy = 0; ++ ++ if (phydev) ++ mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK; ++ ++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy, ++ GSWIP_MDIO_PHYp(port)); + } + + return 0; +@@ -675,14 +689,6 @@ static void gswip_port_disable(struct dsa_switch *ds, int port) + if (!dsa_is_user_port(ds, port)) + return; + +- if (!dsa_is_cpu_port(ds, port)) { +- gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_DOWN, +- GSWIP_MDIO_PHY_LINK_MASK, +- GSWIP_MDIO_PHYp(port)); +- /* Deactivate MDIO auto polling */ +- gswip_mdio_mask(priv, BIT(port), 0, GSWIP_MDIO_MDC_CFG0); +- } +- + gswip_switch_mask(priv, GSWIP_FDMA_PCTRL_EN, 0, + GSWIP_FDMA_PCTRLp(port)); + gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0, +@@ -790,20 +796,31 @@ static int gswip_setup(struct dsa_switch *ds) + gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2); + gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3); + +- /* disable PHY auto polling */ ++ /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an ++ * interoperability problem with this auto polling mechanism because ++ * their status registers think that the link is in a different state ++ * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set ++ * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the ++ * auto polling state machine consider the link being negotiated with ++ * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads ++ * to the switch port being completely dead (RX and TX are both not ++ * working). ++ * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F ++ * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes ++ * it would work fine for a few minutes to hours and then stop, on ++ * other device it would no traffic could be sent or received at all. ++ * Testing shows that when PHY auto polling is disabled these problems ++ * go away. ++ */ + gswip_mdio_w(priv, 0x0, GSWIP_MDIO_MDC_CFG0); ++ + /* Configure the MDIO Clock 2.5 MHz */ + gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1); + +- for (i = 0; i < priv->hw_info->max_ports; i++) { +- /* Disable the xMII link */ ++ /* Disable the xMII link */ ++ for (i = 0; i < priv->hw_info->max_ports; i++) + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i); + +- /* Automatically select the xMII interface clock */ +- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, +- GSWIP_MII_CFG_RATE_AUTO, i); +- } +- + /* enable special tag insertion on cpu port */ + gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN, + GSWIP_FDMA_PCTRLp(cpu_port)); +@@ -1452,6 +1469,112 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port, + return; + } + ++static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link) ++{ ++ u32 mdio_phy; ++ ++ if (link) ++ mdio_phy = GSWIP_MDIO_PHY_LINK_UP; ++ else ++ mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN; ++ ++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_MASK, mdio_phy, ++ GSWIP_MDIO_PHYp(port)); ++} ++ ++static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed, ++ phy_interface_t interface) ++{ ++ u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0; ++ ++ switch (speed) { ++ case SPEED_10: ++ mdio_phy = GSWIP_MDIO_PHY_SPEED_M10; ++ ++ if (interface == PHY_INTERFACE_MODE_RMII) ++ mii_cfg = GSWIP_MII_CFG_RATE_M50; ++ else ++ mii_cfg = GSWIP_MII_CFG_RATE_M2P5; ++ ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; ++ break; ++ ++ case SPEED_100: ++ mdio_phy = GSWIP_MDIO_PHY_SPEED_M100; ++ ++ if (interface == PHY_INTERFACE_MODE_RMII) ++ mii_cfg = GSWIP_MII_CFG_RATE_M50; ++ else ++ mii_cfg = GSWIP_MII_CFG_RATE_M25; ++ ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII; ++ break; ++ ++ case SPEED_1000: ++ mdio_phy = GSWIP_MDIO_PHY_SPEED_G1; ++ ++ mii_cfg = GSWIP_MII_CFG_RATE_M125; ++ ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII; ++ break; ++ } ++ ++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy, ++ GSWIP_MDIO_PHYp(port)); ++ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port); ++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0, ++ GSWIP_MAC_CTRL_0p(port)); ++} ++ ++static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex) ++{ ++ u32 mac_ctrl_0, mdio_phy; ++ ++ if (duplex == DUPLEX_FULL) { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN; ++ mdio_phy = GSWIP_MDIO_PHY_FDUP_EN; ++ } else { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS; ++ mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS; ++ } ++ ++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0, ++ GSWIP_MAC_CTRL_0p(port)); ++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy, ++ GSWIP_MDIO_PHYp(port)); ++} ++ ++static void gswip_port_set_pause(struct gswip_priv *priv, int port, ++ bool tx_pause, bool rx_pause) ++{ ++ u32 mac_ctrl_0, mdio_phy; ++ ++ if (tx_pause && rx_pause) { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX; ++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | ++ GSWIP_MDIO_PHY_FCONRX_EN; ++ } else if (tx_pause) { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX; ++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN | ++ GSWIP_MDIO_PHY_FCONRX_DIS; ++ } else if (rx_pause) { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX; ++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | ++ GSWIP_MDIO_PHY_FCONRX_EN; ++ } else { ++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE; ++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS | ++ GSWIP_MDIO_PHY_FCONRX_DIS; ++ } ++ ++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FCON_MASK, ++ mac_ctrl_0, GSWIP_MAC_CTRL_0p(port)); ++ gswip_mdio_mask(priv, ++ GSWIP_MDIO_PHY_FCONTX_MASK | ++ GSWIP_MDIO_PHY_FCONRX_MASK, ++ mdio_phy, GSWIP_MDIO_PHYp(port)); ++} ++ + static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, + unsigned int mode, + const struct phylink_link_state *state) +@@ -1485,6 +1608,11 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port, + } + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port); + ++ gswip_port_set_speed(priv, port, state->speed, state->interface); ++ gswip_port_set_duplex(priv, port, state->duplex); ++ gswip_port_set_pause(priv, port, !!(state->pause & MLO_PAUSE_TX), ++ !!(state->pause & MLO_PAUSE_RX)); ++ + switch (state->interface) { + case PHY_INTERFACE_MODE_RGMII_ID: + gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK | +@@ -1508,6 +1636,9 @@ static void gswip_phylink_mac_link_down(struct dsa_switch *ds, int port, + struct gswip_priv *priv = ds->priv; + + gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port); ++ ++ if (!dsa_is_cpu_port(ds, port)) ++ gswip_port_set_link(priv, port, false); + } + + static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, +@@ -1517,6 +1648,9 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port, + { + struct gswip_priv *priv = ds->priv; + ++ if (!dsa_is_cpu_port(ds, port)) ++ gswip_port_set_link(priv, port, true); ++ + gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port); + } + +-- +2.30.2 + diff --git a/queue-5.4/series b/queue-5.4/series index 9dcc313bae1..48effe1bebf 100644 --- a/queue-5.4/series +++ b/queue-5.4/series @@ -38,3 +38,7 @@ usbip-stub-dev-synchronize-sysfs-code-paths.patch usbip-vudc-synchronize-sysfs-code-paths.patch usbip-synchronize-event-handler-with-sysfs-code-paths.patch i2c-turn-recovery-error-on-init-to-debug.patch +virtio_net-add-xdp-meta-data-support.patch +virtio_net-do-not-pull-payload-in-skb-head.patch +net-dsa-lantiq_gswip-don-t-use-phy-auto-polling.patch +net-dsa-lantiq_gswip-configure-all-remaining-gswip_m.patch diff --git a/queue-5.4/virtio_net-add-xdp-meta-data-support.patch b/queue-5.4/virtio_net-add-xdp-meta-data-support.patch new file mode 100644 index 00000000000..28d720f3fd1 --- /dev/null +++ b/queue-5.4/virtio_net-add-xdp-meta-data-support.patch @@ -0,0 +1,187 @@ +From dda96b5d6bf024a4d5559998b666cdfbf4656cf8 Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Tue, 25 Feb 2020 12:32:12 +0900 +Subject: virtio_net: Add XDP meta data support + +From: Yuya Kusakabe + +[ Upstream commit 503d539a6e417b018616bf3060e0b5814fafce47 ] + +Implement support for transferring XDP meta data into skb for +virtio_net driver; before calling into the program, xdp.data_meta points +to xdp.data, where on program return with pass verdict, we call +into skb_metadata_set(). + +Tested with the script at +https://github.com/higebu/virtio_net-xdp-metadata-test. + +Signed-off-by: Yuya Kusakabe +Signed-off-by: Daniel Borkmann +Acked-by: Jason Wang +Acked-by: Michael S. Tsirkin +Link: https://lore.kernel.org/bpf/20200225033212.437563-2-yuya.kusakabe@gmail.com +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 52 ++++++++++++++++++++++++---------------- + 1 file changed, 32 insertions(+), 20 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index 0ef85819665c..b67460864b3c 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -376,7 +376,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, + struct receive_queue *rq, + struct page *page, unsigned int offset, + unsigned int len, unsigned int truesize, +- bool hdr_valid) ++ bool hdr_valid, unsigned int metasize) + { + struct sk_buff *skb; + struct virtio_net_hdr_mrg_rxbuf *hdr; +@@ -398,6 +398,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, + else + hdr_padded_len = sizeof(struct padded_vnet_hdr); + ++ /* hdr_valid means no XDP, so we can copy the vnet header */ + if (hdr_valid) + memcpy(hdr, p, hdr_len); + +@@ -410,6 +411,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, + copy = skb_tailroom(skb); + skb_put_data(skb, p, copy); + ++ if (metasize) { ++ __skb_pull(skb, metasize); ++ skb_metadata_set(skb, metasize); ++ } ++ + len -= copy; + offset += copy; + +@@ -455,10 +461,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi, + struct virtio_net_hdr_mrg_rxbuf *hdr; + int err; + +- /* virtqueue want to use data area in-front of packet */ +- if (unlikely(xdpf->metasize > 0)) +- return -EOPNOTSUPP; +- + if (unlikely(xdpf->headroom < vi->hdr_len)) + return -EOVERFLOW; + +@@ -649,6 +651,7 @@ static struct sk_buff *receive_small(struct net_device *dev, + unsigned int delta = 0; + struct page *xdp_page; + int err; ++ unsigned int metasize = 0; + + len -= vi->hdr_len; + stats->bytes += len; +@@ -688,8 +691,8 @@ static struct sk_buff *receive_small(struct net_device *dev, + + xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len; + xdp.data = xdp.data_hard_start + xdp_headroom; +- xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + len; ++ xdp.data_meta = xdp.data; + xdp.rxq = &rq->xdp_rxq; + orig_data = xdp.data; + act = bpf_prog_run_xdp(xdp_prog, &xdp); +@@ -700,6 +703,7 @@ static struct sk_buff *receive_small(struct net_device *dev, + /* Recalculate length in case bpf program changed it */ + delta = orig_data - xdp.data; + len = xdp.data_end - xdp.data; ++ metasize = xdp.data - xdp.data_meta; + break; + case XDP_TX: + stats->xdp_tx++; +@@ -745,6 +749,9 @@ static struct sk_buff *receive_small(struct net_device *dev, + memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len); + } /* keep zeroed vnet hdr since packet was changed by bpf */ + ++ if (metasize) ++ skb_metadata_set(skb, metasize); ++ + err: + return skb; + +@@ -765,8 +772,8 @@ static struct sk_buff *receive_big(struct net_device *dev, + struct virtnet_rq_stats *stats) + { + struct page *page = buf; +- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len, +- PAGE_SIZE, true); ++ struct sk_buff *skb = ++ page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0); + + stats->bytes += len - vi->hdr_len; + if (unlikely(!skb)) +@@ -798,6 +805,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, + unsigned int truesize; + unsigned int headroom = mergeable_ctx_to_headroom(ctx); + int err; ++ unsigned int metasize = 0; + + head_skb = NULL; + stats->bytes += len - vi->hdr_len; +@@ -844,8 +852,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, + data = page_address(xdp_page) + offset; + xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len; + xdp.data = data + vi->hdr_len; +- xdp_set_data_meta_invalid(&xdp); + xdp.data_end = xdp.data + (len - vi->hdr_len); ++ xdp.data_meta = xdp.data; + xdp.rxq = &rq->xdp_rxq; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); +@@ -853,24 +861,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, + + switch (act) { + case XDP_PASS: ++ metasize = xdp.data - xdp.data_meta; ++ + /* recalculate offset to account for any header +- * adjustments. Note other cases do not build an +- * skb and avoid using offset ++ * adjustments and minus the metasize to copy the ++ * metadata in page_to_skb(). Note other cases do not ++ * build an skb and avoid using offset + */ +- offset = xdp.data - +- page_address(xdp_page) - vi->hdr_len; ++ offset = xdp.data - page_address(xdp_page) - ++ vi->hdr_len - metasize; + +- /* recalculate len if xdp.data or xdp.data_end were +- * adjusted ++ /* recalculate len if xdp.data, xdp.data_end or ++ * xdp.data_meta were adjusted + */ +- len = xdp.data_end - xdp.data + vi->hdr_len; ++ len = xdp.data_end - xdp.data + vi->hdr_len + metasize; + /* We can only create skb based on xdp_page. */ + if (unlikely(xdp_page != page)) { + rcu_read_unlock(); + put_page(page); +- head_skb = page_to_skb(vi, rq, xdp_page, +- offset, len, +- PAGE_SIZE, false); ++ head_skb = page_to_skb(vi, rq, xdp_page, offset, ++ len, PAGE_SIZE, false, ++ metasize); + return head_skb; + } + break; +@@ -926,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, + goto err_skb; + } + +- head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog); ++ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog, ++ metasize); + curr_skb = head_skb; + + if (unlikely(!curr_skb)) +-- +2.30.2 + diff --git a/queue-5.4/virtio_net-do-not-pull-payload-in-skb-head.patch b/queue-5.4/virtio_net-do-not-pull-payload-in-skb-head.patch new file mode 100644 index 00000000000..bffbc5f46bb --- /dev/null +++ b/queue-5.4/virtio_net-do-not-pull-payload-in-skb-head.patch @@ -0,0 +1,119 @@ +From 5c71c936f47778846f8b084e2ca396f74230575d Mon Sep 17 00:00:00 2001 +From: Sasha Levin +Date: Fri, 2 Apr 2021 06:26:02 -0700 +Subject: virtio_net: Do not pull payload in skb->head + +From: Eric Dumazet + +[ Upstream commit 0f6925b3e8da0dbbb52447ca8a8b42b371aac7db ] + +Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize +under-estimation for tiny skbs") brought a ~10% performance drop. + +The reason for the performance drop was that GRO was forced +to chain sk_buff (using skb_shinfo(skb)->frag_list), which +uses more memory but also cause packet consumers to go over +a lot of overhead handling all the tiny skbs. + +It turns out that virtio_net page_to_skb() has a wrong strategy : +It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then +copies 128 bytes from the page, before feeding the packet to GRO stack. + +This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize +under-estimation for tiny skbs") because GRO was using 2 frags per MSS, +meaning we were not packing MSS with 100% efficiency. + +Fix is to pull only the ethernet header in page_to_skb() + +Then, we change virtio_net_hdr_to_skb() to pull the missing +headers, instead of assuming they were already pulled by callers. + +This fixes the performance regression, but could also allow virtio_net +to accept packets with more than 128bytes of headers. + +Many thanks to Xuan Zhuo for his report, and his tests/help. + +Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs") +Reported-by: Xuan Zhuo +Link: https://www.spinics.net/lists/netdev/msg731397.html +Co-Developed-by: Xuan Zhuo +Signed-off-by: Xuan Zhuo +Signed-off-by: Eric Dumazet +Cc: "Michael S. Tsirkin" +Cc: Jason Wang +Cc: virtualization@lists.linux-foundation.org +Acked-by: Jason Wang +Signed-off-by: David S. Miller +Signed-off-by: Sasha Levin +--- + drivers/net/virtio_net.c | 10 +++++++--- + include/linux/virtio_net.h | 14 +++++++++----- + 2 files changed, 16 insertions(+), 8 deletions(-) + +diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c +index b67460864b3c..d8ee001d8e8e 100644 +--- a/drivers/net/virtio_net.c ++++ b/drivers/net/virtio_net.c +@@ -406,9 +406,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi, + offset += hdr_padded_len; + p += hdr_padded_len; + +- copy = len; +- if (copy > skb_tailroom(skb)) +- copy = skb_tailroom(skb); ++ /* Copy all frame if it fits skb->head, otherwise ++ * we let virtio_net_hdr_to_skb() and GRO pull headers as needed. ++ */ ++ if (len <= skb_tailroom(skb)) ++ copy = len; ++ else ++ copy = ETH_HLEN + metasize; + skb_put_data(skb, p, copy); + + if (metasize) { +diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h +index 98775d7fa696..b465f8f3e554 100644 +--- a/include/linux/virtio_net.h ++++ b/include/linux/virtio_net.h +@@ -65,14 +65,18 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + skb_reset_mac_header(skb); + + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { +- u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start); +- u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); ++ u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start); ++ u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); ++ u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); ++ ++ if (!pskb_may_pull(skb, needed)) ++ return -EINVAL; + + if (!skb_partial_csum_set(skb, start, off)) + return -EINVAL; + + p_off = skb_transport_offset(skb) + thlen; +- if (p_off > skb_headlen(skb)) ++ if (!pskb_may_pull(skb, p_off)) + return -EINVAL; + } else { + /* gso packets without NEEDS_CSUM do not set transport_offset. +@@ -102,14 +106,14 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, + } + + p_off = keys.control.thoff + thlen; +- if (p_off > skb_headlen(skb) || ++ if (!pskb_may_pull(skb, p_off) || + keys.basic.ip_proto != ip_proto) + return -EINVAL; + + skb_set_transport_header(skb, keys.control.thoff); + } else if (gso_type) { + p_off = thlen; +- if (p_off > skb_headlen(skb)) ++ if (!pskb_may_pull(skb, p_off)) + return -EINVAL; + } + } +-- +2.30.2 +