]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
idpf: fix a race in txq wakeup
authorBrian Vazquez <brianvv@google.com>
Thu, 1 May 2025 17:06:17 +0000 (17:06 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 19 Jun 2025 13:32:16 +0000 (15:32 +0200)
[ Upstream commit 7292af042bcf22e2c18b96ed250f78498a5b28ab ]

Add a helper function to correctly handle the lockless
synchronization when the sender needs to block. The paradigm is

        if (no_resources()) {
                stop_queue();
                barrier();
                if (!no_resources())
                        restart_queue();
        }

netif_subqueue_maybe_stop already handles the paradigm correctly, but
the code split the check for resources in three parts, the first one
(descriptors) followed the protocol, but the other two (completions and
tx_buf) were only doing the first part and so race prone.

Luckily netif_subqueue_maybe_stop macro already allows you to use a
function to evaluate the start/stop conditions so the fix only requires
the right helper function to evaluate all the conditions at once.

The patch removes idpf_tx_maybe_stop_common since it's no longer needed
and instead adjusts separately the conditions for singleq and splitq.

Note that idpf_tx_buf_hw_update doesn't need to check for resources
since that will be covered in idpf_tx_splitq_frame.

To reproduce:

Reduce the threshold for pending completions to increase the chances of
hitting this pause by changing your kernel:

drivers/net/ethernet/intel/idpf/idpf_txrx.h

-#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq)   ((txcq)->desc_count >> 1)
+#define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq)   ((txcq)->desc_count >> 4)

Use pktgen to force the host to push small pkts very aggressively:

./pktgen_sample02_multiqueue.sh -i eth1 -s 100 -6 -d $IP -m $MAC \
  -p 10000-10000 -t 16 -n 0 -v -x -c 64

Fixes: 6818c4d5b3c2 ("idpf: add splitq start_xmit")
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
Signed-off-by: Josh Hay <joshua.a.hay@intel.com>
Signed-off-by: Brian Vazquez <brianvv@google.com>
Signed-off-by: Luigi Rizzo <lrizzo@google.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Tested-by: Samuel Salin <Samuel.salin@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c
drivers/net/ethernet/intel/idpf/idpf_txrx.c
drivers/net/ethernet/intel/idpf/idpf_txrx.h

index dfd7cf1d9aa0ad54a1da83248f10200e2b5ec19c..a986dd57255592a4329596814a22319b2e6740cb 100644 (file)
@@ -362,17 +362,18 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb,
 {
        struct idpf_tx_offload_params offload = { };
        struct idpf_tx_buf *first;
+       int csum, tso, needed;
        unsigned int count;
        __be16 protocol;
-       int csum, tso;
 
        count = idpf_tx_desc_count_required(tx_q, skb);
        if (unlikely(!count))
                return idpf_tx_drop_skb(tx_q, skb);
 
-       if (idpf_tx_maybe_stop_common(tx_q,
-                                     count + IDPF_TX_DESCS_PER_CACHE_LINE +
-                                     IDPF_TX_DESCS_FOR_CTX)) {
+       needed = count + IDPF_TX_DESCS_PER_CACHE_LINE + IDPF_TX_DESCS_FOR_CTX;
+       if (!netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+                                      IDPF_DESC_UNUSED(tx_q),
+                                      needed, needed)) {
                idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false);
 
                u64_stats_update_begin(&tx_q->stats_sync);
index 623bf17f87f9c016e950b8bcffac0018fa217c39..c6c36de58b9d125495b11ab88d63d510d9cf4f52 100644 (file)
@@ -2132,6 +2132,19 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
        desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag);
 }
 
+/* Global conditions to tell whether the txq (and related resources)
+ * has room to allow the use of "size" descriptors.
+ */
+static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size)
+{
+       if (IDPF_DESC_UNUSED(tx_q) < size ||
+           IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
+               IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) ||
+           IDPF_TX_BUF_RSV_LOW(tx_q))
+               return 0;
+       return 1;
+}
+
 /**
  * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions
  * @tx_q: the queue to be checked
@@ -2142,29 +2155,11 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc,
 static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q,
                                     unsigned int descs_needed)
 {
-       if (idpf_tx_maybe_stop_common(tx_q, descs_needed))
-               goto out;
-
-       /* If there are too many outstanding completions expected on the
-        * completion queue, stop the TX queue to give the device some time to
-        * catch up
-        */
-       if (unlikely(IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) >
-                    IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq)))
-               goto splitq_stop;
-
-       /* Also check for available book keeping buffers; if we are low, stop
-        * the queue to wait for more completions
-        */
-       if (unlikely(IDPF_TX_BUF_RSV_LOW(tx_q)))
-               goto splitq_stop;
-
-       return 0;
-
-splitq_stop:
-       netif_stop_subqueue(tx_q->netdev, tx_q->idx);
+       if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
+                                     idpf_txq_has_room(tx_q, descs_needed),
+                                     1, 1))
+               return 0;
 
-out:
        u64_stats_update_begin(&tx_q->stats_sync);
        u64_stats_inc(&tx_q->q_stats.q_busy);
        u64_stats_update_end(&tx_q->stats_sync);
@@ -2190,12 +2185,6 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val,
        nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx);
        tx_q->next_to_use = val;
 
-       if (idpf_tx_maybe_stop_common(tx_q, IDPF_TX_DESC_NEEDED)) {
-               u64_stats_update_begin(&tx_q->stats_sync);
-               u64_stats_inc(&tx_q->q_stats.q_busy);
-               u64_stats_update_end(&tx_q->stats_sync);
-       }
-
        /* Force memory writes to complete before letting h/w
         * know there are new descriptors to fetch.  (Only
         * applicable for weak-ordered memory model archs,
index 9c1fe84108ed2e33870245a8888a2e9b26b06f4b..ffeeaede6cf8f41387c283eb2e98b9ca864eca9b 100644 (file)
@@ -1052,12 +1052,4 @@ bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq,
                                      u16 cleaned_count);
 int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off);
 
-static inline bool idpf_tx_maybe_stop_common(struct idpf_tx_queue *tx_q,
-                                            u32 needed)
-{
-       return !netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx,
-                                         IDPF_DESC_UNUSED(tx_q),
-                                         needed, needed);
-}
-
 #endif /* !_IDPF_TXRX_H_ */