]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
net: hns3: add handling for xmit skb with recursive fraglist
authorYunsheng Lin <linyunsheng@huawei.com>
Mon, 29 Mar 2021 03:57:50 +0000 (11:57 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 19 May 2021 08:29:29 +0000 (10:29 +0200)
[ Upstream commit d5d5e0193ee8f88efbbc7f1471087255657bc19a ]

Currently hns3 driver only handle the xmit skb with one level of
fraglist skb, add handling for multi level by calling hns3_tx_bd_num()
recursively when calculating bd num and calling hns3_fill_skb_to_desc()
recursively when filling tx desc.

When the skb has a fraglist level of 24, the skb is simply dropped and
stats.max_recursion_level is added to record the error. Move the stat
handling from hns3_nic_net_xmit() to hns3_nic_maybe_stop_tx() in order
to handle different error stat and add the 'max_recursion_level' and
'hw_limitation' stat.

Note that the max recursive level as 24 is chose according to below:
commit 48a1df65334b ("skbuff: return -EMSGSIZE in skb_to_sgvec to
prevent overflow").

And that we are not able to find a testcase to verify the recursive
fraglist case, so Fixes tag is not provided.

Reported-by: Barry Song <song.bao.hua@hisilicon.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c

index c8a43a725ebcc0c435236257cfe0d7d4bbe41c88..e9b8b9aa3b95ba929ee093ca8379cc3c5d4344db 100644 (file)
@@ -1277,23 +1277,21 @@ static unsigned int hns3_skb_bd_num(struct sk_buff *skb, unsigned int *bd_size,
 }
 
 static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
-                                  u8 max_non_tso_bd_num)
+                                  u8 max_non_tso_bd_num, unsigned int bd_num,
+                                  unsigned int recursion_level)
 {
+#define HNS3_MAX_RECURSION_LEVEL       24
+
        struct sk_buff *frag_skb;
-       unsigned int bd_num = 0;
 
        /* If the total len is within the max bd limit */
-       if (likely(skb->len <= HNS3_MAX_BD_SIZE && !skb_has_frag_list(skb) &&
+       if (likely(skb->len <= HNS3_MAX_BD_SIZE && !recursion_level &&
+                  !skb_has_frag_list(skb) &&
                   skb_shinfo(skb)->nr_frags < max_non_tso_bd_num))
                return skb_shinfo(skb)->nr_frags + 1U;
 
-       /* The below case will always be linearized, return
-        * HNS3_MAX_BD_NUM_TSO + 1U to make sure it is linearized.
-        */
-       if (unlikely(skb->len > HNS3_MAX_TSO_SIZE ||
-                    (!skb_is_gso(skb) && skb->len >
-                     HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))))
-               return HNS3_MAX_TSO_BD_NUM + 1U;
+       if (unlikely(recursion_level >= HNS3_MAX_RECURSION_LEVEL))
+               return UINT_MAX;
 
        bd_num = hns3_skb_bd_num(skb, bd_size, bd_num);
 
@@ -1301,7 +1299,8 @@ static unsigned int hns3_tx_bd_num(struct sk_buff *skb, unsigned int *bd_size,
                return bd_num;
 
        skb_walk_frags(skb, frag_skb) {
-               bd_num = hns3_skb_bd_num(frag_skb, bd_size, bd_num);
+               bd_num = hns3_tx_bd_num(frag_skb, bd_size, max_non_tso_bd_num,
+                                       bd_num, recursion_level + 1);
                if (bd_num > HNS3_MAX_TSO_BD_NUM)
                        return bd_num;
        }
@@ -1361,6 +1360,43 @@ void hns3_shinfo_pack(struct skb_shared_info *shinfo, __u32 *size)
                size[i] = skb_frag_size(&shinfo->frags[i]);
 }
 
+static int hns3_skb_linearize(struct hns3_enet_ring *ring,
+                             struct sk_buff *skb,
+                             u8 max_non_tso_bd_num,
+                             unsigned int bd_num)
+{
+       /* 'bd_num == UINT_MAX' means the skb' fraglist has a
+        * recursion level of over HNS3_MAX_RECURSION_LEVEL.
+        */
+       if (bd_num == UINT_MAX) {
+               u64_stats_update_begin(&ring->syncp);
+               ring->stats.over_max_recursion++;
+               u64_stats_update_end(&ring->syncp);
+               return -ENOMEM;
+       }
+
+       /* The skb->len has exceeded the hw limitation, linearization
+        * will not help.
+        */
+       if (skb->len > HNS3_MAX_TSO_SIZE ||
+           (!skb_is_gso(skb) && skb->len >
+            HNS3_MAX_NON_TSO_SIZE(max_non_tso_bd_num))) {
+               u64_stats_update_begin(&ring->syncp);
+               ring->stats.hw_limitation++;
+               u64_stats_update_end(&ring->syncp);
+               return -ENOMEM;
+       }
+
+       if (__skb_linearize(skb)) {
+               u64_stats_update_begin(&ring->syncp);
+               ring->stats.sw_err_cnt++;
+               u64_stats_update_end(&ring->syncp);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
 static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
                                  struct net_device *netdev,
                                  struct sk_buff *skb)
@@ -1370,7 +1406,7 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
        unsigned int bd_size[HNS3_MAX_TSO_BD_NUM + 1U];
        unsigned int bd_num;
 
-       bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num);
+       bd_num = hns3_tx_bd_num(skb, bd_size, max_non_tso_bd_num, 0, 0);
        if (unlikely(bd_num > max_non_tso_bd_num)) {
                if (bd_num <= HNS3_MAX_TSO_BD_NUM && skb_is_gso(skb) &&
                    !hns3_skb_need_linearized(skb, bd_size, bd_num,
@@ -1379,16 +1415,11 @@ static int hns3_nic_maybe_stop_tx(struct hns3_enet_ring *ring,
                        goto out;
                }
 
-               if (__skb_linearize(skb))
+               if (hns3_skb_linearize(ring, skb, max_non_tso_bd_num,
+                                      bd_num))
                        return -ENOMEM;
 
                bd_num = hns3_tx_bd_count(skb->len);
-               if ((skb_is_gso(skb) && bd_num > HNS3_MAX_TSO_BD_NUM) ||
-                   (!skb_is_gso(skb) &&
-                    bd_num > max_non_tso_bd_num)) {
-                       trace_hns3_over_max_bd(skb);
-                       return -ENOMEM;
-               }
 
                u64_stats_update_begin(&ring->syncp);
                ring->stats.tx_copy++;
@@ -1412,6 +1443,10 @@ out:
                return bd_num;
        }
 
+       u64_stats_update_begin(&ring->syncp);
+       ring->stats.tx_busy++;
+       u64_stats_update_end(&ring->syncp);
+
        return -EBUSY;
 }
 
@@ -1459,6 +1494,7 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
                                 struct sk_buff *skb, enum hns_desc_type type)
 {
        unsigned int size = skb_headlen(skb);
+       struct sk_buff *frag_skb;
        int i, ret, bd_num = 0;
 
        if (size) {
@@ -1483,6 +1519,15 @@ static int hns3_fill_skb_to_desc(struct hns3_enet_ring *ring,
                bd_num += ret;
        }
 
+       skb_walk_frags(skb, frag_skb) {
+               ret = hns3_fill_skb_to_desc(ring, frag_skb,
+                                           DESC_TYPE_FRAGLIST_SKB);
+               if (unlikely(ret < 0))
+                       return ret;
+
+               bd_num += ret;
+       }
+
        return bd_num;
 }
 
@@ -1513,8 +1558,6 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
        struct hns3_enet_ring *ring = &priv->ring[skb->queue_mapping];
        struct netdev_queue *dev_queue;
        int pre_ntu, next_to_use_head;
-       struct sk_buff *frag_skb;
-       int bd_num = 0;
        bool doorbell;
        int ret;
 
@@ -1530,15 +1573,8 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
        ret = hns3_nic_maybe_stop_tx(ring, netdev, skb);
        if (unlikely(ret <= 0)) {
                if (ret == -EBUSY) {
-                       u64_stats_update_begin(&ring->syncp);
-                       ring->stats.tx_busy++;
-                       u64_stats_update_end(&ring->syncp);
                        hns3_tx_doorbell(ring, 0, true);
                        return NETDEV_TX_BUSY;
-               } else if (ret == -ENOMEM) {
-                       u64_stats_update_begin(&ring->syncp);
-                       ring->stats.sw_err_cnt++;
-                       u64_stats_update_end(&ring->syncp);
                }
 
                hns3_rl_err(netdev, "xmit error: %d!\n", ret);
@@ -1551,21 +1587,14 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
        if (unlikely(ret < 0))
                goto fill_err;
 
+       /* 'ret < 0' means filling error, 'ret == 0' means skb->len is
+        * zero, which is unlikely, and 'ret > 0' means how many tx desc
+        * need to be notified to the hw.
+        */
        ret = hns3_fill_skb_to_desc(ring, skb, DESC_TYPE_SKB);
-       if (unlikely(ret < 0))
+       if (unlikely(ret <= 0))
                goto fill_err;
 
-       bd_num += ret;
-
-       skb_walk_frags(skb, frag_skb) {
-               ret = hns3_fill_skb_to_desc(ring, frag_skb,
-                                           DESC_TYPE_FRAGLIST_SKB);
-               if (unlikely(ret < 0))
-                       goto fill_err;
-
-               bd_num += ret;
-       }
-
        pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
                                        (ring->desc_num - 1);
        ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
@@ -1576,7 +1605,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
        dev_queue = netdev_get_tx_queue(netdev, ring->queue_index);
        doorbell = __netdev_tx_sent_queue(dev_queue, skb->len,
                                          netdev_xmit_more());
-       hns3_tx_doorbell(ring, bd_num, doorbell);
+       hns3_tx_doorbell(ring, ret, doorbell);
 
        return NETDEV_TX_OK;
 
@@ -1748,11 +1777,15 @@ static void hns3_nic_get_stats64(struct net_device *netdev,
                        tx_drop += ring->stats.tx_l4_proto_err;
                        tx_drop += ring->stats.tx_l2l3l4_err;
                        tx_drop += ring->stats.tx_tso_err;
+                       tx_drop += ring->stats.over_max_recursion;
+                       tx_drop += ring->stats.hw_limitation;
                        tx_errors += ring->stats.sw_err_cnt;
                        tx_errors += ring->stats.tx_vlan_err;
                        tx_errors += ring->stats.tx_l4_proto_err;
                        tx_errors += ring->stats.tx_l2l3l4_err;
                        tx_errors += ring->stats.tx_tso_err;
+                       tx_errors += ring->stats.over_max_recursion;
+                       tx_errors += ring->stats.hw_limitation;
                } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 
                /* fetch the rx stats */
index 0a7b606e7c9384086ec5938d8d63fa37968190d3..0b531e107e26498bf6e8bfd1e2ec0d10e699c812 100644 (file)
@@ -377,6 +377,8 @@ struct ring_stats {
                        u64 tx_l4_proto_err;
                        u64 tx_l2l3l4_err;
                        u64 tx_tso_err;
+                       u64 over_max_recursion;
+                       u64 hw_limitation;
                };
                struct {
                        u64 rx_pkts;
index e2fc443fe92ca1bcc0e3696567492bd5e0a0c844..7276cfaa8c3b866e1408643968f218a0106a0409 100644 (file)
@@ -44,6 +44,8 @@ static const struct hns3_stats hns3_txq_stats[] = {
        HNS3_TQP_STAT("l4_proto_err", tx_l4_proto_err),
        HNS3_TQP_STAT("l2l3l4_err", tx_l2l3l4_err),
        HNS3_TQP_STAT("tso_err", tx_tso_err),
+       HNS3_TQP_STAT("over_max_recursion", over_max_recursion),
+       HNS3_TQP_STAT("hw_limitation", hw_limitation),
 };
 
 #define HNS3_TXQ_STATS_COUNT ARRAY_SIZE(hns3_txq_stats)