]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net: enetc: add LSO support for i.MX95 ENETC PF
authorWei Fang <wei.fang@nxp.com>
Thu, 19 Dec 2024 05:47:54 +0000 (13:47 +0800)
committerJakub Kicinski <kuba@kernel.org>
Mon, 23 Dec 2024 17:54:33 +0000 (09:54 -0800)
ENETC rev 4.1 supports large send offload (LSO), segmenting large TCP
and UDP transmit units into multiple Ethernet frames. To support LSO,
software needs to fill some auxiliary information in Tx BD, such as LSO
header length, frame length, LSO maximum segment size, etc.

At 1Gbps link rate, TCP segmentation was tested using iperf3, and the
CPU performance before and after applying the patch was compared through
the top command. It can be seen that LSO saves a significant amount of
CPU cycles compared to software TSO.

Before applying the patch:
%Cpu(s):  0.1 us,  4.1 sy,  0.0 ni, 85.7 id,  0.0 wa,  0.5 hi,  9.7 si

After applying the patch:
%Cpu(s):  0.1 us,  2.3 sy,  0.0 ni, 94.5 id,  0.0 wa,  0.4 hi,  2.6 si

Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Frank Li <Frank.Li@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Link: https://patch.msgid.link/20241219054755.1615626-4-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/freescale/enetc/enetc.h
drivers/net/ethernet/freescale/enetc/enetc4_hw.h
drivers/net/ethernet/freescale/enetc/enetc_hw.h
drivers/net/ethernet/freescale/enetc/enetc_pf_common.c

index 76c33506991b237a66ec67401df76f8d94a512fa..6a6fc819dfdeeb13e8ba03f7ec9cba6c91a2afaf 100644 (file)
@@ -532,6 +532,230 @@ static void enetc_tso_complete_csum(struct enetc_bdr *tx_ring, struct tso_t *tso
        }
 }
 
+static int enetc_lso_count_descs(const struct sk_buff *skb)
+{
+       /* 4 BDs: 1 BD for LSO header + 1 BD for extended BD + 1 BD
+        * for linear area data but not include LSO header, namely
+        * skb_headlen(skb) - lso_hdr_len (it may be 0, but that's
+        * okay, we only need to consider the worst case). And 1 BD
+        * for gap.
+        */
+       return skb_shinfo(skb)->nr_frags + 4;
+}
+
+static int enetc_lso_get_hdr_len(const struct sk_buff *skb)
+{
+       int hdr_len, tlen;
+
+       tlen = skb_is_gso_tcp(skb) ? tcp_hdrlen(skb) : sizeof(struct udphdr);
+       hdr_len = skb_transport_offset(skb) + tlen;
+
+       return hdr_len;
+}
+
+static void enetc_lso_start(struct sk_buff *skb, struct enetc_lso_t *lso)
+{
+       lso->lso_seg_size = skb_shinfo(skb)->gso_size;
+       lso->ipv6 = enetc_skb_is_ipv6(skb);
+       lso->tcp = skb_is_gso_tcp(skb);
+       lso->l3_hdr_len = skb_network_header_len(skb);
+       lso->l3_start = skb_network_offset(skb);
+       lso->hdr_len = enetc_lso_get_hdr_len(skb);
+       lso->total_len = skb->len - lso->hdr_len;
+}
+
+static void enetc_lso_map_hdr(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+                             int *i, struct enetc_lso_t *lso)
+{
+       union enetc_tx_bd txbd_tmp, *txbd;
+       struct enetc_tx_swbd *tx_swbd;
+       u16 frm_len, frm_len_ext;
+       u8 flags, e_flags = 0;
+       dma_addr_t addr;
+       char *hdr;
+
+       /* Get the first BD of the LSO BDs chain */
+       txbd = ENETC_TXBD(*tx_ring, *i);
+       tx_swbd = &tx_ring->tx_swbd[*i];
+       prefetchw(txbd);
+
+       /* Prepare LSO header: MAC + IP + TCP/UDP */
+       hdr = tx_ring->tso_headers + *i * TSO_HEADER_SIZE;
+       memcpy(hdr, skb->data, lso->hdr_len);
+       addr = tx_ring->tso_headers_dma + *i * TSO_HEADER_SIZE;
+
+       /* {frm_len_ext, frm_len} indicates the total length of
+        * large transmit data unit. frm_len contains the 16 least
+        * significant bits and frm_len_ext contains the 4 most
+        * significant bits.
+        */
+       frm_len = lso->total_len & 0xffff;
+       frm_len_ext = (lso->total_len >> 16) & 0xf;
+
+       /* Set the flags of the first BD */
+       flags = ENETC_TXBD_FLAGS_EX | ENETC_TXBD_FLAGS_CSUM_LSO |
+               ENETC_TXBD_FLAGS_LSO | ENETC_TXBD_FLAGS_L4CS;
+
+       enetc_clear_tx_bd(&txbd_tmp);
+       txbd_tmp.addr = cpu_to_le64(addr);
+       txbd_tmp.hdr_len = cpu_to_le16(lso->hdr_len);
+
+       /* first BD needs frm_len and offload flags set */
+       txbd_tmp.frm_len = cpu_to_le16(frm_len);
+       txbd_tmp.flags = flags;
+
+       txbd_tmp.l3_aux0 = FIELD_PREP(ENETC_TX_BD_L3_START, lso->l3_start);
+       /* l3_hdr_size in 32-bits (4 bytes) */
+       txbd_tmp.l3_aux1 = FIELD_PREP(ENETC_TX_BD_L3_HDR_LEN,
+                                     lso->l3_hdr_len / 4);
+       if (lso->ipv6)
+               txbd_tmp.l3_aux1 |= ENETC_TX_BD_L3T;
+       else
+               txbd_tmp.l3_aux0 |= ENETC_TX_BD_IPCS;
+
+       txbd_tmp.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, lso->tcp ?
+                                    ENETC_TXBD_L4T_TCP : ENETC_TXBD_L4T_UDP);
+
+       /* For the LSO header we do not set the dma address since
+        * we do not want it unmapped when we do cleanup. We still
+        * set len so that we count the bytes sent.
+        */
+       tx_swbd->len = lso->hdr_len;
+       tx_swbd->do_twostep_tstamp = false;
+       tx_swbd->check_wb = false;
+
+       /* Actually write the header in the BD */
+       *txbd = txbd_tmp;
+
+       /* Get the next BD, and the next BD is extended BD */
+       enetc_bdr_idx_inc(tx_ring, i);
+       txbd = ENETC_TXBD(*tx_ring, *i);
+       tx_swbd = &tx_ring->tx_swbd[*i];
+       prefetchw(txbd);
+
+       enetc_clear_tx_bd(&txbd_tmp);
+       if (skb_vlan_tag_present(skb)) {
+               /* Setup the VLAN fields */
+               txbd_tmp.ext.vid = cpu_to_le16(skb_vlan_tag_get(skb));
+               txbd_tmp.ext.tpid = ENETC_TPID_8021Q;
+               e_flags = ENETC_TXBD_E_FLAGS_VLAN_INS;
+       }
+
+       /* Write the BD */
+       txbd_tmp.ext.e_flags = e_flags;
+       txbd_tmp.ext.lso_sg_size = cpu_to_le16(lso->lso_seg_size);
+       txbd_tmp.ext.frm_len_ext = cpu_to_le16(frm_len_ext);
+       *txbd = txbd_tmp;
+}
+
+static int enetc_lso_map_data(struct enetc_bdr *tx_ring, struct sk_buff *skb,
+                             int *i, struct enetc_lso_t *lso, int *count)
+{
+       union enetc_tx_bd txbd_tmp, *txbd = NULL;
+       struct enetc_tx_swbd *tx_swbd;
+       skb_frag_t *frag;
+       dma_addr_t dma;
+       u8 flags = 0;
+       int len, f;
+
+       len = skb_headlen(skb) - lso->hdr_len;
+       if (len > 0) {
+               dma = dma_map_single(tx_ring->dev, skb->data + lso->hdr_len,
+                                    len, DMA_TO_DEVICE);
+               if (dma_mapping_error(tx_ring->dev, dma))
+                       return -ENOMEM;
+
+               enetc_bdr_idx_inc(tx_ring, i);
+               txbd = ENETC_TXBD(*tx_ring, *i);
+               tx_swbd = &tx_ring->tx_swbd[*i];
+               prefetchw(txbd);
+               *count += 1;
+
+               enetc_clear_tx_bd(&txbd_tmp);
+               txbd_tmp.addr = cpu_to_le64(dma);
+               txbd_tmp.buf_len = cpu_to_le16(len);
+
+               tx_swbd->dma = dma;
+               tx_swbd->len = len;
+               tx_swbd->is_dma_page = 0;
+               tx_swbd->dir = DMA_TO_DEVICE;
+       }
+
+       frag = &skb_shinfo(skb)->frags[0];
+       for (f = 0; f < skb_shinfo(skb)->nr_frags; f++, frag++) {
+               if (txbd)
+                       *txbd = txbd_tmp;
+
+               len = skb_frag_size(frag);
+               dma = skb_frag_dma_map(tx_ring->dev, frag);
+               if (dma_mapping_error(tx_ring->dev, dma))
+                       return -ENOMEM;
+
+               /* Get the next BD */
+               enetc_bdr_idx_inc(tx_ring, i);
+               txbd = ENETC_TXBD(*tx_ring, *i);
+               tx_swbd = &tx_ring->tx_swbd[*i];
+               prefetchw(txbd);
+               *count += 1;
+
+               enetc_clear_tx_bd(&txbd_tmp);
+               txbd_tmp.addr = cpu_to_le64(dma);
+               txbd_tmp.buf_len = cpu_to_le16(len);
+
+               tx_swbd->dma = dma;
+               tx_swbd->len = len;
+               tx_swbd->is_dma_page = 1;
+               tx_swbd->dir = DMA_TO_DEVICE;
+       }
+
+       /* Last BD needs 'F' bit set */
+       flags |= ENETC_TXBD_FLAGS_F;
+       txbd_tmp.flags = flags;
+       *txbd = txbd_tmp;
+
+       tx_swbd->is_eof = 1;
+       tx_swbd->skb = skb;
+
+       return 0;
+}
+
+static int enetc_lso_hw_offload(struct enetc_bdr *tx_ring, struct sk_buff *skb)
+{
+       struct enetc_tx_swbd *tx_swbd;
+       struct enetc_lso_t lso = {0};
+       int err, i, count = 0;
+
+       /* Initialize the LSO handler */
+       enetc_lso_start(skb, &lso);
+       i = tx_ring->next_to_use;
+
+       enetc_lso_map_hdr(tx_ring, skb, &i, &lso);
+       /* First BD and an extend BD */
+       count += 2;
+
+       err = enetc_lso_map_data(tx_ring, skb, &i, &lso, &count);
+       if (err)
+               goto dma_err;
+
+       /* Go to the next BD */
+       enetc_bdr_idx_inc(tx_ring, &i);
+       tx_ring->next_to_use = i;
+       enetc_update_tx_ring_tail(tx_ring);
+
+       return count;
+
+dma_err:
+       do {
+               tx_swbd = &tx_ring->tx_swbd[i];
+               enetc_free_tx_frame(tx_ring, tx_swbd);
+               if (i == 0)
+                       i = tx_ring->bd_count;
+               i--;
+       } while (--count);
+
+       return 0;
+}
+
 static int enetc_map_tx_tso_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb)
 {
        struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev);
@@ -652,14 +876,26 @@ static netdev_tx_t enetc_start_xmit(struct sk_buff *skb,
        tx_ring = priv->tx_ring[skb->queue_mapping];
 
        if (skb_is_gso(skb)) {
-               if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
-                       netif_stop_subqueue(ndev, tx_ring->index);
-                       return NETDEV_TX_BUSY;
-               }
+               /* LSO data unit lengths of up to 256KB are supported */
+               if (priv->active_offloads & ENETC_F_LSO &&
+                   (skb->len - enetc_lso_get_hdr_len(skb)) <=
+                   ENETC_LSO_MAX_DATA_LEN) {
+                       if (enetc_bd_unused(tx_ring) < enetc_lso_count_descs(skb)) {
+                               netif_stop_subqueue(ndev, tx_ring->index);
+                               return NETDEV_TX_BUSY;
+                       }
 
-               enetc_lock_mdio();
-               count = enetc_map_tx_tso_buffs(tx_ring, skb);
-               enetc_unlock_mdio();
+                       count = enetc_lso_hw_offload(tx_ring, skb);
+               } else {
+                       if (enetc_bd_unused(tx_ring) < tso_count_descs(skb)) {
+                               netif_stop_subqueue(ndev, tx_ring->index);
+                               return NETDEV_TX_BUSY;
+                       }
+
+                       enetc_lock_mdio();
+                       count = enetc_map_tx_tso_buffs(tx_ring, skb);
+                       enetc_unlock_mdio();
+               }
        } else {
                if (unlikely(skb_shinfo(skb)->nr_frags > priv->max_frags))
                        if (unlikely(skb_linearize(skb)))
@@ -1799,6 +2035,9 @@ void enetc_get_si_caps(struct enetc_si *si)
                rss = enetc_rd(hw, ENETC_SIRSSCAPR);
                si->num_rss = ENETC_SIRSSCAPR_GET_NUM_RSS(rss);
        }
+
+       if (val & ENETC_SIPCAPR0_LSO)
+               si->hw_features |= ENETC_SI_F_LSO;
 }
 EXPORT_SYMBOL_GPL(enetc_get_si_caps);
 
@@ -2095,6 +2334,14 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
        return 0;
 }
 
+static void enetc_set_lso_flags_mask(struct enetc_hw *hw)
+{
+       enetc_wr(hw, ENETC4_SILSOSFMR0,
+                SILSOSFMR0_VAL_SET(ENETC4_TCP_NL_SEG_FLAGS_DMASK,
+                                   ENETC4_TCP_NL_SEG_FLAGS_DMASK));
+       enetc_wr(hw, ENETC4_SILSOSFMR1, 0);
+}
+
 int enetc_configure_si(struct enetc_ndev_priv *priv)
 {
        struct enetc_si *si = priv->si;
@@ -2108,6 +2355,9 @@ int enetc_configure_si(struct enetc_ndev_priv *priv)
        /* enable SI */
        enetc_wr(hw, ENETC_SIMR, ENETC_SIMR_EN);
 
+       if (si->hw_features & ENETC_SI_F_LSO)
+               enetc_set_lso_flags_mask(hw);
+
        /* TODO: RSS support for i.MX95 will be supported later, and the
         * is_enetc_rev1() condition will be removed
         */
index 1e680f0f512364ad4a1a09e5d2a2aa713c0d5367..4ad4eb5c5a747d937ac5cad5b77b960af9335920 100644 (file)
@@ -41,6 +41,18 @@ struct enetc_tx_swbd {
        u8 qbv_en:1;
 };
 
+struct enetc_lso_t {
+       bool    ipv6;
+       bool    tcp;
+       u8      l3_hdr_len;
+       u8      hdr_len; /* LSO header length */
+       u8      l3_start;
+       u16     lso_seg_size;
+       int     total_len; /* total data length, not include LSO header */
+};
+
+#define ENETC_LSO_MAX_DATA_LEN         SZ_256K
+
 #define ENETC_RX_MAXFRM_SIZE   ENETC_MAC_MAXFRM_SIZE
 #define ENETC_RXB_TRUESIZE     2048 /* PAGE_SIZE >> 1 */
 #define ENETC_RXB_PAD          NET_SKB_PAD /* add extra space if needed */
@@ -238,6 +250,7 @@ enum enetc_errata {
 #define ENETC_SI_F_PSFP BIT(0)
 #define ENETC_SI_F_QBV  BIT(1)
 #define ENETC_SI_F_QBU  BIT(2)
+#define ENETC_SI_F_LSO BIT(3)
 
 struct enetc_drvdata {
        u32 pmac_offset; /* Only valid for PSI which supports 802.1Qbu */
@@ -351,6 +364,7 @@ enum enetc_active_offloads {
        ENETC_F_QCI                     = BIT(10),
        ENETC_F_QBU                     = BIT(11),
        ENETC_F_TXCSUM                  = BIT(12),
+       ENETC_F_LSO                     = BIT(13),
 };
 
 enum enetc_flags_bit {
index 26b220677448190ab79bab6dba77d7a81d2dc668..695cb07c74bcf15081eef7fff07f4fc61679ccb5 100644 (file)
 #define NXP_ENETC_VENDOR_ID            0x1131
 #define NXP_ENETC_PF_DEV_ID            0xe101
 
+/**********************Station interface registers************************/
+/* Station interface LSO segmentation flag mask register 0/1 */
+#define ENETC4_SILSOSFMR0              0x1300
+#define  SILSOSFMR0_TCP_MID_SEG                GENMASK(27, 16)
+#define  SILSOSFMR0_TCP_1ST_SEG                GENMASK(11, 0)
+#define  SILSOSFMR0_VAL_SET(first, mid)        (FIELD_PREP(SILSOSFMR0_TCP_MID_SEG, mid) | \
+                                        FIELD_PREP(SILSOSFMR0_TCP_1ST_SEG, first))
+
+#define ENETC4_SILSOSFMR1              0x1304
+#define  SILSOSFMR1_TCP_LAST_SEG       GENMASK(11, 0)
+#define   ENETC4_TCP_FLAGS_FIN         BIT(0)
+#define   ENETC4_TCP_FLAGS_SYN         BIT(1)
+#define   ENETC4_TCP_FLAGS_RST         BIT(2)
+#define   ENETC4_TCP_FLAGS_PSH         BIT(3)
+#define   ENETC4_TCP_FLAGS_ACK         BIT(4)
+#define   ENETC4_TCP_FLAGS_URG         BIT(5)
+#define   ENETC4_TCP_FLAGS_ECE         BIT(6)
+#define   ENETC4_TCP_FLAGS_CWR         BIT(7)
+#define   ENETC4_TCP_FLAGS_NS          BIT(8)
+/* According to tso_build_hdr(), clear all special flags for not last packet. */
+#define ENETC4_TCP_NL_SEG_FLAGS_DMASK  (ENETC4_TCP_FLAGS_FIN | \
+                                        ENETC4_TCP_FLAGS_RST | ENETC4_TCP_FLAGS_PSH)
+
 /***************************ENETC port registers**************************/
 #define ENETC4_ECAPR0                  0x0
 #define  ECAPR0_RFS                    BIT(2)
index 0e259baf36ee6ff2c4ed0006af2ce90b2306b9e2..4098f01479bc0afc39c4dc8cc9938f5dbc8ac997 100644 (file)
@@ -25,6 +25,7 @@
 #define ENETC_SIPCAPR0 0x20
 #define ENETC_SIPCAPR0_RSS     BIT(8)
 #define ENETC_SIPCAPR0_RFS     BIT(2)
+#define ENETC_SIPCAPR0_LSO     BIT(1)
 #define ENETC_SIPCAPR1 0x24
 #define ENETC_SITGTGR  0x30
 #define ENETC_SIRBGCR  0x38
@@ -554,7 +555,10 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
 union enetc_tx_bd {
        struct {
                __le64 addr;
-               __le16 buf_len;
+               union {
+                       __le16 buf_len;
+                       __le16 hdr_len; /* For LSO, ENETC 4.1 and later */
+               };
                __le16 frm_len;
                union {
                        struct {
@@ -578,13 +582,16 @@ union enetc_tx_bd {
                __le32 tstamp;
                __le16 tpid;
                __le16 vid;
-               u8 reserved[6];
+               __le16 lso_sg_size; /* For ENETC 4.1 and later */
+               __le16 frm_len_ext; /* For ENETC 4.1 and later */
+               u8 reserved[2];
                u8 e_flags;
                u8 flags;
        } ext; /* Tx BD extension */
        struct {
                __le32 tstamp;
-               u8 reserved[10];
+               u8 reserved[8];
+               __le16 lso_err_count; /* For ENETC 4.1 and later */
                u8 status;
                u8 flags;
        } wb; /* writeback descriptor */
@@ -593,6 +600,7 @@ union enetc_tx_bd {
 enum enetc_txbd_flags {
        ENETC_TXBD_FLAGS_L4CS = BIT(0), /* For ENETC 4.1 and later */
        ENETC_TXBD_FLAGS_TSE = BIT(1),
+       ENETC_TXBD_FLAGS_LSO = BIT(1), /* For ENETC 4.1 and later */
        ENETC_TXBD_FLAGS_W = BIT(2),
        ENETC_TXBD_FLAGS_CSUM_LSO = BIT(3), /* For ENETC 4.1 and later */
        ENETC_TXBD_FLAGS_TXSTART = BIT(4),
@@ -663,6 +671,8 @@ union enetc_rx_bd {
 #define ENETC_CBD_FLAGS_SF     BIT(7) /* short format */
 #define ENETC_CBD_STATUS_MASK  0xf
 
+#define ENETC_TPID_8021Q       0
+
 struct enetc_cmd_rfse {
        u8 smac_h[6];
        u8 smac_m[6];
index 00b73a9487466dd74182e1ee1fc1c38a065bfd2a..31dedc665a16594aea61d0ec9136fab9f9afe8f0 100644 (file)
@@ -123,6 +123,9 @@ void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
        if (si->drvdata->tx_csum)
                priv->active_offloads |= ENETC_F_TXCSUM;
 
+       if (si->hw_features & ENETC_SI_F_LSO)
+               priv->active_offloads |= ENETC_F_LSO;
+
        /* TODO: currently, i.MX95 ENETC driver does not support advanced features */
        if (!is_enetc_rev1(si)) {
                ndev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK);