]> git.ipfire.org Git - thirdparty/kernel/stable.git/commitdiff
gve: Fix stuck TX queue for DQ queue format
authorPraveen Kaligineedi <pkaligineedi@google.com>
Thu, 17 Jul 2025 19:20:24 +0000 (19:20 +0000)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 28 Aug 2025 14:22:30 +0000 (16:22 +0200)
commit b03f15c0192b184078206760c839054ae6eb4eaa upstream.

gve_tx_timeout was calculating missed completions in a way that is only
relevant in the GQ queue format. Additionally, it was attempting to
disable device interrupts, which is not needed in either GQ or DQ queue
formats.

As a result, TX timeouts with the DQ queue format likely would have
triggered early resets without kicking the queue at all.

This patch drops the check for pending work altogether and always kicks
the queue after validating the queue has not seen a TX timeout too
recently.

Cc: stable@vger.kernel.org
Fixes: 87a7f321bb6a ("gve: Recover from queue stall due to missed IRQ")
Co-developed-by: Tim Hostetler <thostet@google.com>
Signed-off-by: Tim Hostetler <thostet@google.com>
Signed-off-by: Praveen Kaligineedi <pkaligineedi@google.com>
Signed-off-by: Harshitha Ramamurthy <hramamurthy@google.com>
Link: https://patch.msgid.link/20250717192024.1820931-1-hramamurthy@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/net/ethernet/google/gve/gve_main.c

index f458a97dd7910c7d4105d89e2a9346aa246936ba..c409e46e3cfd8dbda4480c5447de17e2484fd11f 100644 (file)
@@ -944,49 +944,56 @@ static void gve_turnup(struct gve_priv *priv)
        gve_set_napi_enabled(priv);
 }
 
-static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+static struct gve_notify_block *gve_get_tx_notify_block(struct gve_priv *priv,
+                                                       unsigned int txqueue)
 {
-       struct gve_notify_block *block;
-       struct gve_tx_ring *tx = NULL;
-       struct gve_priv *priv;
-       u32 last_nic_done;
-       u32 current_time;
        u32 ntfy_idx;
 
-       netdev_info(dev, "Timeout on tx queue, %d", txqueue);
-       priv = netdev_priv(dev);
        if (txqueue > priv->tx_cfg.num_queues)
-               goto reset;
+               return NULL;
 
        ntfy_idx = gve_tx_idx_to_ntfy(priv, txqueue);
        if (ntfy_idx >= priv->num_ntfy_blks)
-               goto reset;
+               return NULL;
+
+       return &priv->ntfy_blocks[ntfy_idx];
+}
+
+static bool gve_tx_timeout_try_q_kick(struct gve_priv *priv,
+                                     unsigned int txqueue)
+{
+       struct gve_notify_block *block;
+       u32 current_time;
 
-       block = &priv->ntfy_blocks[ntfy_idx];
-       tx = block->tx;
+       block = gve_get_tx_notify_block(priv, txqueue);
+
+       if (!block)
+               return false;
 
        current_time = jiffies_to_msecs(jiffies);
-       if (tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
-               goto reset;
+       if (block->tx->last_kick_msec + MIN_TX_TIMEOUT_GAP > current_time)
+               return false;
 
-       /* Check to see if there are missed completions, which will allow us to
-        * kick the queue.
-        */
-       last_nic_done = gve_tx_load_event_counter(priv, tx);
-       if (last_nic_done - tx->done) {
-               netdev_info(dev, "Kicking queue %d", txqueue);
-               iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block));
-               napi_schedule(&block->napi);
-               tx->last_kick_msec = current_time;
-               goto out;
-       } // Else reset.
+       netdev_info(priv->dev, "Kicking queue %d", txqueue);
+       napi_schedule(&block->napi);
+       block->tx->last_kick_msec = current_time;
+       return true;
+}
 
-reset:
-       gve_schedule_reset(priv);
+static void gve_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+       struct gve_notify_block *block;
+       struct gve_priv *priv;
+
+       netdev_info(dev, "Timeout on tx queue, %d", txqueue);
+       priv = netdev_priv(dev);
+
+       if (!gve_tx_timeout_try_q_kick(priv, txqueue))
+               gve_schedule_reset(priv);
 
-out:
-       if (tx)
-               tx->queue_timeout++;
+       block = gve_get_tx_notify_block(priv, txqueue);
+       if (block)
+               block->tx->queue_timeout++;
        priv->tx_timeo_cnt++;
 }