]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5e: Recover SQ on excessive PTP TX timestamp delta
authorCarolina Jubran <cjubran@nvidia.com>
Mon, 17 Nov 2025 21:42:06 +0000 (23:42 +0200)
committerJakub Kicinski <kuba@kernel.org>
Wed, 19 Nov 2025 02:53:33 +0000 (18:53 -0800)
Extend the TX timestamp handler to recover the SQ when the difference
between the port and CQE TX timestamps is abnormally large.

The current logic aborts timestamp delivery if the delta exceeds
1/128 seconds, which matches the maximum expected packet interval in
ptp4l. A larger delta makes the timestamps unreliable.

This change adds recovery if the delta exceeds 0.5 seconds. Such a
large gap should not occur in normal operation and indicates that
firmware is stuck or metadata tracking is out of sync, leading to stale
or mismatched timestamps. Recovering the SQ ensures forward progress
and avoids silently dropping invalid timestamps.

The timestamp handler now takes mlx5e_ptpsq directly to access both CQ
stats and the recovery state.

Signed-off-by: Carolina Jubran <cjubran@nvidia.com>
Reviewed-by: Shahar Shitrit <shshitrit@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/1763415729-1238421-3-git-send-email-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c
drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h
drivers/net/ethernet/mellanox/mlx5/core/en_tx.c

index 12e10feb30f0688a6eb83443bf91691027839a70..424f8a2728a3efcf1a817ebcdb1242933ad9200a 100644 (file)
@@ -82,7 +82,7 @@ static struct mlx5e_skb_cb_hwtstamp *mlx5e_skb_cb_get_hwts(struct sk_buff *skb)
 }
 
 static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
-                                    struct mlx5e_ptp_cq_stats *cq_stats)
+                                    struct mlx5e_ptpsq *ptpsq)
 {
        struct skb_shared_hwtstamps hwts = {};
        ktime_t diff;
@@ -92,8 +92,17 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
 
        /* Maximal allowed diff is 1 / 128 second */
        if (diff > (NSEC_PER_SEC >> 7)) {
-               cq_stats->abort++;
-               cq_stats->abort_abs_diff_ns += diff;
+               struct mlx5e_txqsq *sq = &ptpsq->txqsq;
+
+               ptpsq->cq_stats->abort++;
+               ptpsq->cq_stats->abort_abs_diff_ns += diff;
+               if (diff > (NSEC_PER_SEC >> 1) &&
+                   !test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state)) {
+                       netdev_warn(sq->channel->netdev,
+                                   "PTP TX timestamp difference between CQE and port exceeds threshold: %lld ns, recovering SQ %u\n",
+                                   (s64)diff, sq->sqn);
+                       queue_work(sq->priv->wq, &ptpsq->report_unhealthy_work);
+               }
                return;
        }
 
@@ -103,7 +112,7 @@ static void mlx5e_skb_cb_hwtstamp_tx(struct sk_buff *skb,
 
 void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
                                   ktime_t hwtstamp,
-                                  struct mlx5e_ptp_cq_stats *cq_stats)
+                                  struct mlx5e_ptpsq *ptpsq)
 {
        switch (hwtstamp_type) {
        case (MLX5E_SKB_CB_CQE_HWTSTAMP):
@@ -121,7 +130,7 @@ void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
            !mlx5e_skb_cb_get_hwts(skb)->port_hwtstamp)
                return;
 
-       mlx5e_skb_cb_hwtstamp_tx(skb, cq_stats);
+       mlx5e_skb_cb_hwtstamp_tx(skb, ptpsq);
        memset(skb->cb, 0, sizeof(struct mlx5e_skb_cb_hwtstamp));
 }
 
@@ -209,7 +218,7 @@ static void mlx5e_ptp_handle_ts_cqe(struct mlx5e_ptpsq *ptpsq,
 
        hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, get_cqe_ts(cqe));
        mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_PORT_HWTSTAMP,
-                                     hwtstamp, ptpsq->cq_stats);
+                                     hwtstamp, ptpsq);
        ptpsq->cq_stats->cqe++;
 
        mlx5e_ptpsq_mark_ts_cqes_undelivered(ptpsq, hwtstamp);
index 1c0e0a86a9ac85523bc66987d8823592c8c1ff2d..2a457a2ed7079e62068732f06573d3fc474fea29 100644 (file)
@@ -147,7 +147,7 @@ enum {
 
 void mlx5e_skb_cb_hwtstamp_handler(struct sk_buff *skb, int hwtstamp_type,
                                   ktime_t hwtstamp,
-                                  struct mlx5e_ptp_cq_stats *cq_stats);
+                                  struct mlx5e_ptpsq *ptpsq);
 
 void mlx5e_skb_cb_hwtstamp_init(struct sk_buff *skb);
 #endif /* __MLX5_EN_PTP_H__ */
index 2702b3885f062b1bf5dc91b08e2067de320c027d..14884b9ea7f396069c17b778d2a65cb05608ff0e 100644 (file)
@@ -755,7 +755,7 @@ static void mlx5e_consume_skb(struct mlx5e_txqsq *sq, struct sk_buff *skb,
                hwts.hwtstamp = mlx5e_cqe_ts_to_ns(sq->ptp_cyc2time, sq->clock, ts);
                if (sq->ptpsq) {
                        mlx5e_skb_cb_hwtstamp_handler(skb, MLX5E_SKB_CB_CQE_HWTSTAMP,
-                                                     hwts.hwtstamp, sq->ptpsq->cq_stats);
+                                                     hwts.hwtstamp, sq->ptpsq);
                } else {
                        skb_tstamp_tx(skb, &hwts);
                        sq->stats->timestamps++;