--- /dev/null
+From 24d4b996e6031700ec91aba70b5ac86cd24a1d60 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Apr 2021 12:23:44 +0200
+Subject: net: dsa: lantiq_gswip: Configure all remaining GSWIP_MII_CFG bits
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+commit 4b5923249b8fa427943b50b8f35265176472be38 upstream.
+
+There are a few more bits in the GSWIP_MII_CFG register for which we
+did rely on the boot-loader (or the hardware defaults) to set them up
+properly.
+
+For some external RMII PHYs we need to select the GSWIP_MII_CFG_RMII_CLK
+bit and also we should un-set it for non-RMII PHYs. The
+GSWIP_MII_CFG_RMII_CLK bit is ignored for other PHY connection modes.
+
+The GSWIP IP also supports in-band auto-negotiation for RGMII PHYs when
+the GSWIP_MII_CFG_RGMII_IBS bit is set. Clear this bit always as there's
+no known hardware which uses this (so it is not tested yet).
+
+Clear the xMII isolation bit when set at initialization time if it was
+previously set by the bootloader. Not doing so could lead to no traffic
+(neither RX nor TX) on a port with this bit set.
+
+While here, also add the GSWIP_MII_CFG_RESET bit. We don't need to
+manage it because this bit is self-clearning when set. We still add it
+here to get a better overview of the GSWIP_MII_CFG register.
+
+Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
+Cc: stable@vger.kernel.org
+Suggested-by: Hauke Mehrtens <hauke@hauke-m.de>
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[ Updated after the upstream commit 3e9005be87777 required some changes
+ for Linux 5.4 ]
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/lantiq_gswip.c | 19 ++++++++++++++++---
+ 1 file changed, 16 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
+index e0f5d406e6c0..dc75e798dbff 100644
+--- a/drivers/net/dsa/lantiq_gswip.c
++++ b/drivers/net/dsa/lantiq_gswip.c
+@@ -93,8 +93,12 @@
+
+ /* GSWIP MII Registers */
+ #define GSWIP_MII_CFGp(p) (0x2 * (p))
++#define GSWIP_MII_CFG_RESET BIT(15)
+ #define GSWIP_MII_CFG_EN BIT(14)
++#define GSWIP_MII_CFG_ISOLATE BIT(13)
+ #define GSWIP_MII_CFG_LDCLKDIS BIT(12)
++#define GSWIP_MII_CFG_RGMII_IBS BIT(8)
++#define GSWIP_MII_CFG_RMII_CLK BIT(7)
+ #define GSWIP_MII_CFG_MODE_MIIP 0x0
+ #define GSWIP_MII_CFG_MODE_MIIM 0x1
+ #define GSWIP_MII_CFG_MODE_RMIIP 0x2
+@@ -817,9 +821,11 @@ static int gswip_setup(struct dsa_switch *ds)
+ /* Configure the MDIO Clock 2.5 MHz */
+ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
+
+- /* Disable the xMII link */
++ /* Disable the xMII interface and clear it's isolation bit */
+ for (i = 0; i < priv->hw_info->max_ports; i++)
+- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i);
++ gswip_mii_mask_cfg(priv,
++ GSWIP_MII_CFG_EN | GSWIP_MII_CFG_ISOLATE,
++ 0, i);
+
+ /* enable special tag insertion on cpu port */
+ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,
+@@ -1594,6 +1600,9 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
+ break;
+ case PHY_INTERFACE_MODE_RMII:
+ miicfg |= GSWIP_MII_CFG_MODE_RMIIM;
++
++ /* Configure the RMII clock as output: */
++ miicfg |= GSWIP_MII_CFG_RMII_CLK;
+ break;
+ case PHY_INTERFACE_MODE_RGMII:
+ case PHY_INTERFACE_MODE_RGMII_ID:
+@@ -1606,7 +1615,11 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
+ "Unsupported interface: %d\n", state->interface);
+ return;
+ }
+- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port);
++
++ gswip_mii_mask_cfg(priv,
++ GSWIP_MII_CFG_MODE_MASK | GSWIP_MII_CFG_RMII_CLK |
++ GSWIP_MII_CFG_RGMII_IBS | GSWIP_MII_CFG_LDCLKDIS,
++ miicfg, port);
+
+ gswip_port_set_speed(priv, port, state->speed, state->interface);
+ gswip_port_set_duplex(priv, port, state->duplex);
+--
+2.30.2
+
--- /dev/null
+From 56dd98de3880dc64717b3d92dd15ba6160da89f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 11 Apr 2021 12:23:43 +0200
+Subject: net: dsa: lantiq_gswip: Don't use PHY auto polling
+
+From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+commit 3e9005be87777afc902b9f5497495898202d335d upstream.
+
+PHY auto polling on the GSWIP hardware can be used so link changes
+(speed, link up/down, etc.) can be detected automatically. Internally
+GSWIP reads the PHY's registers for this functionality. Based on this
+automatic detection GSWIP can also automatically re-configure it's port
+settings. Unfortunately this auto polling (and configuration) mechanism
+seems to cause various issues observed by different people on different
+devices:
+- FritzBox 7360v2: the two Gbit/s ports (connected to the two internal
+ PHY11G instances) are working fine but the two Fast Ethernet ports
+ (using an AR8030 RMII PHY) are completely dead (neither RX nor TX are
+ received). It turns out that the AR8030 PHY sets the BMSR_ESTATEN bit
+ as well as the ESTATUS_1000_TFULL and ESTATUS_1000_XFULL bits. This
+ makes the PHY auto polling state machine (rightfully?) think that the
+ established link speed (when the other side is Gbit/s capable) is
+ 1Gbit/s.
+- None of the Ethernet ports on the Zyxel P-2812HNU-F1 (two are
+ connected to the internal PHY11G GPHYs while the other three are
+ external RGMII PHYs) are working. Neither RX nor TX traffic was
+ observed. It is not clear which part of the PHY auto polling state-
+ machine caused this.
+- FritzBox 7412 (only one LAN port which is connected to one of the
+ internal GPHYs running in PHY22F / Fast Ethernet mode) was seeing
+ random disconnects (link down events could be seen). Sometimes all
+ traffic would stop after such disconnect. It is not clear which part
+ of the PHY auto polling state-machine cauased this.
+- TP-Link TD-W9980 (two ports are connected to the internal GPHYs
+ running in PHY11G / Gbit/s mode, the other two are external RGMII
+ PHYs) was affected by similar issues as the FritzBox 7412 just without
+ the "link down" events
+
+Switch to software based configuration instead of PHY auto polling (and
+letting the GSWIP hardware configure the ports automatically) for the
+following link parameters:
+- link up/down
+- link speed
+- full/half duplex
+- flow control (RX / TX pause)
+
+After a big round of manual testing by various people (who helped test
+this on OpenWrt) it turns out that this fixes all reported issues.
+
+Additionally it can be considered more future proof because any
+"quirk" which is implemented for a PHY on the driver side can now be
+used with the GSWIP hardware as well because Linux is in control of the
+link parameters.
+
+As a nice side-effect this also solves a problem where fixed-links were
+not supported previously because we were relying on the PHY auto polling
+mechanism, which cannot work for fixed-links as there's no PHY from
+where it can read the registers. Configuring the link settings on the
+GSWIP ports means that we now use the settings from device-tree also for
+ports with fixed-links.
+
+Fixes: 14fceff4771e51 ("net: dsa: Add Lantiq / Intel DSA driver for vrx200")
+Fixes: 3e6fdeb28f4c33 ("net: dsa: lantiq_gswip: Let GSWIP automatically set the xMII clock")
+Cc: stable@vger.kernel.org
+Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+[ Move gswip_port_set_{speed, duplex, pause} calls from
+ gswip_phylink_mac_link_up to gswip_phylink_mac_config because the
+ data required for these functions is not available inside
+ gswip_phylink_mac_link_up yet in Linux 5.4 (it was only added with
+ Linux 5.7). ]
+Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/lantiq_gswip.c | 186 ++++++++++++++++++++++++++++-----
+ 1 file changed, 160 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/net/dsa/lantiq_gswip.c b/drivers/net/dsa/lantiq_gswip.c
+index 14019b3197f6..e0f5d406e6c0 100644
+--- a/drivers/net/dsa/lantiq_gswip.c
++++ b/drivers/net/dsa/lantiq_gswip.c
+@@ -190,6 +190,23 @@
+ #define GSWIP_PCE_DEFPVID(p) (0x486 + ((p) * 0xA))
+
+ #define GSWIP_MAC_FLEN 0x8C5
++#define GSWIP_MAC_CTRL_0p(p) (0x903 + ((p) * 0xC))
++#define GSWIP_MAC_CTRL_0_PADEN BIT(8)
++#define GSWIP_MAC_CTRL_0_FCS_EN BIT(7)
++#define GSWIP_MAC_CTRL_0_FCON_MASK 0x0070
++#define GSWIP_MAC_CTRL_0_FCON_AUTO 0x0000
++#define GSWIP_MAC_CTRL_0_FCON_RX 0x0010
++#define GSWIP_MAC_CTRL_0_FCON_TX 0x0020
++#define GSWIP_MAC_CTRL_0_FCON_RXTX 0x0030
++#define GSWIP_MAC_CTRL_0_FCON_NONE 0x0040
++#define GSWIP_MAC_CTRL_0_FDUP_MASK 0x000C
++#define GSWIP_MAC_CTRL_0_FDUP_AUTO 0x0000
++#define GSWIP_MAC_CTRL_0_FDUP_EN 0x0004
++#define GSWIP_MAC_CTRL_0_FDUP_DIS 0x000C
++#define GSWIP_MAC_CTRL_0_GMII_MASK 0x0003
++#define GSWIP_MAC_CTRL_0_GMII_AUTO 0x0000
++#define GSWIP_MAC_CTRL_0_GMII_MII 0x0001
++#define GSWIP_MAC_CTRL_0_GMII_RGMII 0x0002
+ #define GSWIP_MAC_CTRL_2p(p) (0x905 + ((p) * 0xC))
+ #define GSWIP_MAC_CTRL_2_MLEN BIT(3) /* Maximum Untagged Frame Lnegth */
+
+@@ -653,16 +670,13 @@ static int gswip_port_enable(struct dsa_switch *ds, int port,
+ GSWIP_SDMA_PCTRLp(port));
+
+ if (!dsa_is_cpu_port(ds, port)) {
+- u32 macconf = GSWIP_MDIO_PHY_LINK_AUTO |
+- GSWIP_MDIO_PHY_SPEED_AUTO |
+- GSWIP_MDIO_PHY_FDUP_AUTO |
+- GSWIP_MDIO_PHY_FCONTX_AUTO |
+- GSWIP_MDIO_PHY_FCONRX_AUTO |
+- (phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK);
+-
+- gswip_mdio_w(priv, macconf, GSWIP_MDIO_PHYp(port));
+- /* Activate MDIO auto polling */
+- gswip_mdio_mask(priv, 0, BIT(port), GSWIP_MDIO_MDC_CFG0);
++ u32 mdio_phy = 0;
++
++ if (phydev)
++ mdio_phy = phydev->mdio.addr & GSWIP_MDIO_PHY_ADDR_MASK;
++
++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_ADDR_MASK, mdio_phy,
++ GSWIP_MDIO_PHYp(port));
+ }
+
+ return 0;
+@@ -675,14 +689,6 @@ static void gswip_port_disable(struct dsa_switch *ds, int port)
+ if (!dsa_is_user_port(ds, port))
+ return;
+
+- if (!dsa_is_cpu_port(ds, port)) {
+- gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_DOWN,
+- GSWIP_MDIO_PHY_LINK_MASK,
+- GSWIP_MDIO_PHYp(port));
+- /* Deactivate MDIO auto polling */
+- gswip_mdio_mask(priv, BIT(port), 0, GSWIP_MDIO_MDC_CFG0);
+- }
+-
+ gswip_switch_mask(priv, GSWIP_FDMA_PCTRL_EN, 0,
+ GSWIP_FDMA_PCTRLp(port));
+ gswip_switch_mask(priv, GSWIP_SDMA_PCTRL_EN, 0,
+@@ -790,20 +796,31 @@ static int gswip_setup(struct dsa_switch *ds)
+ gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP2);
+ gswip_switch_w(priv, BIT(cpu_port), GSWIP_PCE_PMAP3);
+
+- /* disable PHY auto polling */
++ /* Deactivate MDIO PHY auto polling. Some PHYs as the AR8030 have an
++ * interoperability problem with this auto polling mechanism because
++ * their status registers think that the link is in a different state
++ * than it actually is. For the AR8030 it has the BMSR_ESTATEN bit set
++ * as well as ESTATUS_1000_TFULL and ESTATUS_1000_XFULL. This makes the
++ * auto polling state machine consider the link being negotiated with
++ * 1Gbit/s. Since the PHY itself is a Fast Ethernet RMII PHY this leads
++ * to the switch port being completely dead (RX and TX are both not
++ * working).
++ * Also with various other PHY / port combinations (PHY11G GPHY, PHY22F
++ * GPHY, external RGMII PEF7071/7072) any traffic would stop. Sometimes
++ * it would work fine for a few minutes to hours and then stop, on
++ * other device it would no traffic could be sent or received at all.
++ * Testing shows that when PHY auto polling is disabled these problems
++ * go away.
++ */
+ gswip_mdio_w(priv, 0x0, GSWIP_MDIO_MDC_CFG0);
++
+ /* Configure the MDIO Clock 2.5 MHz */
+ gswip_mdio_mask(priv, 0xff, 0x09, GSWIP_MDIO_MDC_CFG1);
+
+- for (i = 0; i < priv->hw_info->max_ports; i++) {
+- /* Disable the xMII link */
++ /* Disable the xMII link */
++ for (i = 0; i < priv->hw_info->max_ports; i++)
+ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, i);
+
+- /* Automatically select the xMII interface clock */
+- gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK,
+- GSWIP_MII_CFG_RATE_AUTO, i);
+- }
+-
+ /* enable special tag insertion on cpu port */
+ gswip_switch_mask(priv, 0, GSWIP_FDMA_PCTRL_STEN,
+ GSWIP_FDMA_PCTRLp(cpu_port));
+@@ -1452,6 +1469,112 @@ static void gswip_phylink_validate(struct dsa_switch *ds, int port,
+ return;
+ }
+
++static void gswip_port_set_link(struct gswip_priv *priv, int port, bool link)
++{
++ u32 mdio_phy;
++
++ if (link)
++ mdio_phy = GSWIP_MDIO_PHY_LINK_UP;
++ else
++ mdio_phy = GSWIP_MDIO_PHY_LINK_DOWN;
++
++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_LINK_MASK, mdio_phy,
++ GSWIP_MDIO_PHYp(port));
++}
++
++static void gswip_port_set_speed(struct gswip_priv *priv, int port, int speed,
++ phy_interface_t interface)
++{
++ u32 mdio_phy = 0, mii_cfg = 0, mac_ctrl_0 = 0;
++
++ switch (speed) {
++ case SPEED_10:
++ mdio_phy = GSWIP_MDIO_PHY_SPEED_M10;
++
++ if (interface == PHY_INTERFACE_MODE_RMII)
++ mii_cfg = GSWIP_MII_CFG_RATE_M50;
++ else
++ mii_cfg = GSWIP_MII_CFG_RATE_M2P5;
++
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII;
++ break;
++
++ case SPEED_100:
++ mdio_phy = GSWIP_MDIO_PHY_SPEED_M100;
++
++ if (interface == PHY_INTERFACE_MODE_RMII)
++ mii_cfg = GSWIP_MII_CFG_RATE_M50;
++ else
++ mii_cfg = GSWIP_MII_CFG_RATE_M25;
++
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_MII;
++ break;
++
++ case SPEED_1000:
++ mdio_phy = GSWIP_MDIO_PHY_SPEED_G1;
++
++ mii_cfg = GSWIP_MII_CFG_RATE_M125;
++
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_GMII_RGMII;
++ break;
++ }
++
++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_SPEED_MASK, mdio_phy,
++ GSWIP_MDIO_PHYp(port));
++ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_RATE_MASK, mii_cfg, port);
++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_GMII_MASK, mac_ctrl_0,
++ GSWIP_MAC_CTRL_0p(port));
++}
++
++static void gswip_port_set_duplex(struct gswip_priv *priv, int port, int duplex)
++{
++ u32 mac_ctrl_0, mdio_phy;
++
++ if (duplex == DUPLEX_FULL) {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_EN;
++ mdio_phy = GSWIP_MDIO_PHY_FDUP_EN;
++ } else {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FDUP_DIS;
++ mdio_phy = GSWIP_MDIO_PHY_FDUP_DIS;
++ }
++
++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FDUP_MASK, mac_ctrl_0,
++ GSWIP_MAC_CTRL_0p(port));
++ gswip_mdio_mask(priv, GSWIP_MDIO_PHY_FDUP_MASK, mdio_phy,
++ GSWIP_MDIO_PHYp(port));
++}
++
++static void gswip_port_set_pause(struct gswip_priv *priv, int port,
++ bool tx_pause, bool rx_pause)
++{
++ u32 mac_ctrl_0, mdio_phy;
++
++ if (tx_pause && rx_pause) {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RXTX;
++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN |
++ GSWIP_MDIO_PHY_FCONRX_EN;
++ } else if (tx_pause) {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_TX;
++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_EN |
++ GSWIP_MDIO_PHY_FCONRX_DIS;
++ } else if (rx_pause) {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_RX;
++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS |
++ GSWIP_MDIO_PHY_FCONRX_EN;
++ } else {
++ mac_ctrl_0 = GSWIP_MAC_CTRL_0_FCON_NONE;
++ mdio_phy = GSWIP_MDIO_PHY_FCONTX_DIS |
++ GSWIP_MDIO_PHY_FCONRX_DIS;
++ }
++
++ gswip_switch_mask(priv, GSWIP_MAC_CTRL_0_FCON_MASK,
++ mac_ctrl_0, GSWIP_MAC_CTRL_0p(port));
++ gswip_mdio_mask(priv,
++ GSWIP_MDIO_PHY_FCONTX_MASK |
++ GSWIP_MDIO_PHY_FCONRX_MASK,
++ mdio_phy, GSWIP_MDIO_PHYp(port));
++}
++
+ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
+ unsigned int mode,
+ const struct phylink_link_state *state)
+@@ -1485,6 +1608,11 @@ static void gswip_phylink_mac_config(struct dsa_switch *ds, int port,
+ }
+ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_MODE_MASK, miicfg, port);
+
++ gswip_port_set_speed(priv, port, state->speed, state->interface);
++ gswip_port_set_duplex(priv, port, state->duplex);
++ gswip_port_set_pause(priv, port, !!(state->pause & MLO_PAUSE_TX),
++ !!(state->pause & MLO_PAUSE_RX));
++
+ switch (state->interface) {
+ case PHY_INTERFACE_MODE_RGMII_ID:
+ gswip_mii_mask_pcdu(priv, GSWIP_MII_PCDU_TXDLY_MASK |
+@@ -1508,6 +1636,9 @@ static void gswip_phylink_mac_link_down(struct dsa_switch *ds, int port,
+ struct gswip_priv *priv = ds->priv;
+
+ gswip_mii_mask_cfg(priv, GSWIP_MII_CFG_EN, 0, port);
++
++ if (!dsa_is_cpu_port(ds, port))
++ gswip_port_set_link(priv, port, false);
+ }
+
+ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port,
+@@ -1517,6 +1648,9 @@ static void gswip_phylink_mac_link_up(struct dsa_switch *ds, int port,
+ {
+ struct gswip_priv *priv = ds->priv;
+
++ if (!dsa_is_cpu_port(ds, port))
++ gswip_port_set_link(priv, port, true);
++
+ gswip_mii_mask_cfg(priv, 0, GSWIP_MII_CFG_EN, port);
+ }
+
+--
+2.30.2
+
usbip-vudc-synchronize-sysfs-code-paths.patch
usbip-synchronize-event-handler-with-sysfs-code-paths.patch
i2c-turn-recovery-error-on-init-to-debug.patch
+virtio_net-add-xdp-meta-data-support.patch
+virtio_net-do-not-pull-payload-in-skb-head.patch
+net-dsa-lantiq_gswip-don-t-use-phy-auto-polling.patch
+net-dsa-lantiq_gswip-configure-all-remaining-gswip_m.patch
--- /dev/null
+From dda96b5d6bf024a4d5559998b666cdfbf4656cf8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 25 Feb 2020 12:32:12 +0900
+Subject: virtio_net: Add XDP meta data support
+
+From: Yuya Kusakabe <yuya.kusakabe@gmail.com>
+
+[ Upstream commit 503d539a6e417b018616bf3060e0b5814fafce47 ]
+
+Implement support for transferring XDP meta data into skb for
+virtio_net driver; before calling into the program, xdp.data_meta points
+to xdp.data, where on program return with pass verdict, we call
+into skb_metadata_set().
+
+Tested with the script at
+https://github.com/higebu/virtio_net-xdp-metadata-test.
+
+Signed-off-by: Yuya Kusakabe <yuya.kusakabe@gmail.com>
+Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Link: https://lore.kernel.org/bpf/20200225033212.437563-2-yuya.kusakabe@gmail.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 52 ++++++++++++++++++++++++----------------
+ 1 file changed, 32 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index 0ef85819665c..b67460864b3c 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -376,7 +376,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ struct receive_queue *rq,
+ struct page *page, unsigned int offset,
+ unsigned int len, unsigned int truesize,
+- bool hdr_valid)
++ bool hdr_valid, unsigned int metasize)
+ {
+ struct sk_buff *skb;
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+@@ -398,6 +398,7 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ else
+ hdr_padded_len = sizeof(struct padded_vnet_hdr);
+
++ /* hdr_valid means no XDP, so we can copy the vnet header */
+ if (hdr_valid)
+ memcpy(hdr, p, hdr_len);
+
+@@ -410,6 +411,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ copy = skb_tailroom(skb);
+ skb_put_data(skb, p, copy);
+
++ if (metasize) {
++ __skb_pull(skb, metasize);
++ skb_metadata_set(skb, metasize);
++ }
++
+ len -= copy;
+ offset += copy;
+
+@@ -455,10 +461,6 @@ static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
+ struct virtio_net_hdr_mrg_rxbuf *hdr;
+ int err;
+
+- /* virtqueue want to use data area in-front of packet */
+- if (unlikely(xdpf->metasize > 0))
+- return -EOPNOTSUPP;
+-
+ if (unlikely(xdpf->headroom < vi->hdr_len))
+ return -EOVERFLOW;
+
+@@ -649,6 +651,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
+ unsigned int delta = 0;
+ struct page *xdp_page;
+ int err;
++ unsigned int metasize = 0;
+
+ len -= vi->hdr_len;
+ stats->bytes += len;
+@@ -688,8 +691,8 @@ static struct sk_buff *receive_small(struct net_device *dev,
+
+ xdp.data_hard_start = buf + VIRTNET_RX_PAD + vi->hdr_len;
+ xdp.data = xdp.data_hard_start + xdp_headroom;
+- xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + len;
++ xdp.data_meta = xdp.data;
+ xdp.rxq = &rq->xdp_rxq;
+ orig_data = xdp.data;
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+@@ -700,6 +703,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
+ /* Recalculate length in case bpf program changed it */
+ delta = orig_data - xdp.data;
+ len = xdp.data_end - xdp.data;
++ metasize = xdp.data - xdp.data_meta;
+ break;
+ case XDP_TX:
+ stats->xdp_tx++;
+@@ -745,6 +749,9 @@ static struct sk_buff *receive_small(struct net_device *dev,
+ memcpy(skb_vnet_hdr(skb), buf, vi->hdr_len);
+ } /* keep zeroed vnet hdr since packet was changed by bpf */
+
++ if (metasize)
++ skb_metadata_set(skb, metasize);
++
+ err:
+ return skb;
+
+@@ -765,8 +772,8 @@ static struct sk_buff *receive_big(struct net_device *dev,
+ struct virtnet_rq_stats *stats)
+ {
+ struct page *page = buf;
+- struct sk_buff *skb = page_to_skb(vi, rq, page, 0, len,
+- PAGE_SIZE, true);
++ struct sk_buff *skb =
++ page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, true, 0);
+
+ stats->bytes += len - vi->hdr_len;
+ if (unlikely(!skb))
+@@ -798,6 +805,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+ unsigned int truesize;
+ unsigned int headroom = mergeable_ctx_to_headroom(ctx);
+ int err;
++ unsigned int metasize = 0;
+
+ head_skb = NULL;
+ stats->bytes += len - vi->hdr_len;
+@@ -844,8 +852,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+ data = page_address(xdp_page) + offset;
+ xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;
+ xdp.data = data + vi->hdr_len;
+- xdp_set_data_meta_invalid(&xdp);
+ xdp.data_end = xdp.data + (len - vi->hdr_len);
++ xdp.data_meta = xdp.data;
+ xdp.rxq = &rq->xdp_rxq;
+
+ act = bpf_prog_run_xdp(xdp_prog, &xdp);
+@@ -853,24 +861,27 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+
+ switch (act) {
+ case XDP_PASS:
++ metasize = xdp.data - xdp.data_meta;
++
+ /* recalculate offset to account for any header
+- * adjustments. Note other cases do not build an
+- * skb and avoid using offset
++ * adjustments and minus the metasize to copy the
++ * metadata in page_to_skb(). Note other cases do not
++ * build an skb and avoid using offset
+ */
+- offset = xdp.data -
+- page_address(xdp_page) - vi->hdr_len;
++ offset = xdp.data - page_address(xdp_page) -
++ vi->hdr_len - metasize;
+
+- /* recalculate len if xdp.data or xdp.data_end were
+- * adjusted
++ /* recalculate len if xdp.data, xdp.data_end or
++ * xdp.data_meta were adjusted
+ */
+- len = xdp.data_end - xdp.data + vi->hdr_len;
++ len = xdp.data_end - xdp.data + vi->hdr_len + metasize;
+ /* We can only create skb based on xdp_page. */
+ if (unlikely(xdp_page != page)) {
+ rcu_read_unlock();
+ put_page(page);
+- head_skb = page_to_skb(vi, rq, xdp_page,
+- offset, len,
+- PAGE_SIZE, false);
++ head_skb = page_to_skb(vi, rq, xdp_page, offset,
++ len, PAGE_SIZE, false,
++ metasize);
+ return head_skb;
+ }
+ break;
+@@ -926,7 +937,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
+ goto err_skb;
+ }
+
+- head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog);
++ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, !xdp_prog,
++ metasize);
+ curr_skb = head_skb;
+
+ if (unlikely(!curr_skb))
+--
+2.30.2
+
--- /dev/null
+From 5c71c936f47778846f8b084e2ca396f74230575d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 2 Apr 2021 06:26:02 -0700
+Subject: virtio_net: Do not pull payload in skb->head
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 0f6925b3e8da0dbbb52447ca8a8b42b371aac7db ]
+
+Xuan Zhuo reported that commit 3226b158e67c ("net: avoid 32 x truesize
+under-estimation for tiny skbs") brought a ~10% performance drop.
+
+The reason for the performance drop was that GRO was forced
+to chain sk_buff (using skb_shinfo(skb)->frag_list), which
+uses more memory but also cause packet consumers to go over
+a lot of overhead handling all the tiny skbs.
+
+It turns out that virtio_net page_to_skb() has a wrong strategy :
+It allocates skbs with GOOD_COPY_LEN (128) bytes in skb->head, then
+copies 128 bytes from the page, before feeding the packet to GRO stack.
+
+This was suboptimal before commit 3226b158e67c ("net: avoid 32 x truesize
+under-estimation for tiny skbs") because GRO was using 2 frags per MSS,
+meaning we were not packing MSS with 100% efficiency.
+
+Fix is to pull only the ethernet header in page_to_skb()
+
+Then, we change virtio_net_hdr_to_skb() to pull the missing
+headers, instead of assuming they were already pulled by callers.
+
+This fixes the performance regression, but could also allow virtio_net
+to accept packets with more than 128bytes of headers.
+
+Many thanks to Xuan Zhuo for his report, and his tests/help.
+
+Fixes: 3226b158e67c ("net: avoid 32 x truesize under-estimation for tiny skbs")
+Reported-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Link: https://www.spinics.net/lists/netdev/msg731397.html
+Co-Developed-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: Xuan Zhuo <xuanzhuo@linux.alibaba.com>
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: virtualization@lists.linux-foundation.org
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/virtio_net.c | 10 +++++++---
+ include/linux/virtio_net.h | 14 +++++++++-----
+ 2 files changed, 16 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
+index b67460864b3c..d8ee001d8e8e 100644
+--- a/drivers/net/virtio_net.c
++++ b/drivers/net/virtio_net.c
+@@ -406,9 +406,13 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
+ offset += hdr_padded_len;
+ p += hdr_padded_len;
+
+- copy = len;
+- if (copy > skb_tailroom(skb))
+- copy = skb_tailroom(skb);
++ /* Copy all frame if it fits skb->head, otherwise
++ * we let virtio_net_hdr_to_skb() and GRO pull headers as needed.
++ */
++ if (len <= skb_tailroom(skb))
++ copy = len;
++ else
++ copy = ETH_HLEN + metasize;
+ skb_put_data(skb, p, copy);
+
+ if (metasize) {
+diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
+index 98775d7fa696..b465f8f3e554 100644
+--- a/include/linux/virtio_net.h
++++ b/include/linux/virtio_net.h
+@@ -65,14 +65,18 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+ skb_reset_mac_header(skb);
+
+ if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+- u16 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
+- u16 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
++ u32 start = __virtio16_to_cpu(little_endian, hdr->csum_start);
++ u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset);
++ u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16));
++
++ if (!pskb_may_pull(skb, needed))
++ return -EINVAL;
+
+ if (!skb_partial_csum_set(skb, start, off))
+ return -EINVAL;
+
+ p_off = skb_transport_offset(skb) + thlen;
+- if (p_off > skb_headlen(skb))
++ if (!pskb_may_pull(skb, p_off))
+ return -EINVAL;
+ } else {
+ /* gso packets without NEEDS_CSUM do not set transport_offset.
+@@ -102,14 +106,14 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
+ }
+
+ p_off = keys.control.thoff + thlen;
+- if (p_off > skb_headlen(skb) ||
++ if (!pskb_may_pull(skb, p_off) ||
+ keys.basic.ip_proto != ip_proto)
+ return -EINVAL;
+
+ skb_set_transport_header(skb, keys.control.thoff);
+ } else if (gso_type) {
+ p_off = thlen;
+- if (p_off > skb_headlen(skb))
++ if (!pskb_may_pull(skb, p_off))
+ return -EINVAL;
+ }
+ }
+--
+2.30.2
+