gve: implement PTP gettimex64

author Jordan Rhee <jordanrhee@google.com>

Thu, 14 May 2026 22:58:42 +0000 (22:58 +0000)

committer Jakub Kicinski <kuba@kernel.org>

Wed, 20 May 2026 01:17:28 +0000 (18:17 -0700)
author Jordan Rhee <jordanrhee@google.com>
Thu, 14 May 2026 22:58:42 +0000 (22:58 +0000)
committer Jakub Kicinski <kuba@kernel.org>
Wed, 20 May 2026 01:17:28 +0000 (18:17 -0700)
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h

index 7b69d0cfc0d5ccb6737ba5ceec0bd5743286913c..4de3ce60060e0e1ae8dbb4403e02d68ff28b017e 100644 (file)
--- a/drivers/net/ethernet/google/gve/gve.h
+++ b/drivers/net/ethernet/google/gve/gve.h
@@ -880,6 +880,14 @@ struct gve_priv {
         u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */
         u32 suspend_cnt; /* count of times suspended */
         u32 resume_cnt; /* count of times resumed */
+       /* count of cross-timestamps attempted using system timestamps
+        * from the AQ command
+        */
+       u32 ptp_precise_xtstamps;
+       /* count of cross-timestamps attempted using system timestamps sampled
+        * by the driver
+        */
+       u32 ptp_fallback_xtstamps;
         struct workqueue_struct *gve_wq;
         struct work_struct service_task;
         struct work_struct stats_report_task;
diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h

index 22a74b6aa17ea6e3019fee685b02b33149f33440..e6dcf6da9091d27512ff7aad117340affa668f8e 100644 (file)
--- a/drivers/net/ethernet/google/gve/gve_adminq.h
+++ b/drivers/net/ethernet/google/gve/gve_adminq.h
@@ -411,8 +411,8 @@ static_assert(sizeof(struct gve_adminq_report_nic_ts) == 16);
  
  struct gve_nic_ts_report {
         __be64 nic_timestamp; /* NIC clock in nanoseconds */
-       __be64 reserved1;
-       __be64 reserved2;
+       __be64 pre_cycles; /* System cycle counter before NIC clock read */
+       __be64 post_cycles; /* System cycle counter after NIC clock read */
         __be64 reserved3;
         __be64 reserved4;
  };
diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c

index 4fd7e8a442c5e8018226dc846d1407b20372a921..8a088dcc3603ec44a50568ef42243385e36788b0 100644 (file)
--- a/drivers/net/ethernet/google/gve/gve_ethtool.c
+++ b/drivers/net/ethernet/google/gve/gve_ethtool.c
@@ -46,6 +46,7 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = {
         "rx_hsplit_unsplit_pkt",
         "interface_up_cnt", "interface_down_cnt", "reset_cnt",
         "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt",
+       "ptp_precise_xtstamps", "ptp_fallback_xtstamps",
  };
  
  static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = {
@@ -269,6 +270,8 @@ gve_get_ethtool_stats(struct net_device *netdev,
         data[i++] = priv->page_alloc_fail;
         data[i++] = priv->dma_mapping_error;
         data[i++] = priv->stats_report_trigger_cnt;
+       data[i++] = priv->ptp_precise_xtstamps;
+       data[i++] = priv->ptp_fallback_xtstamps;
         i = GVE_MAIN_STATS_LEN;
  
         rx_base_stats_idx = 0;
diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c

index ad15f1209a83e1a07d0f9eec1d7f5f47e4c1319c..bc230e68eb1d3a95cb0e80179e80a3c624c8aaa5 100644 (file)
--- a/drivers/net/ethernet/google/gve/gve_ptp.c
+++ b/drivers/net/ethernet/google/gve/gve_ptp.c
@@ -10,28 +10,261 @@
  /* Interval to schedule a nic timestamp calibration, 250ms. */
  #define GVE_NIC_TS_SYNC_INTERVAL_MS 250
  
+/*
+ * Stores cycle counter samples in get_cycles() units from a
+ * sandwiched NIC clock read
+ */
+struct gve_sysclock_sample {
+       /* Cycle counter from NIC before clock read */
+       u64 nic_pre_cycles;
+       /* Cycle counter from NIC after clock read */
+       u64 nic_post_cycles;
+       /* Cycle counter from host before issuing AQ command */
+       cycles_t host_pre_cycles;
+       /* Cycle counter from host after AQ command returns */
+       cycles_t host_post_cycles;
+};
+
+/*
+ * Read NIC clock by issuing the AQ command. The command is subject to
+ * rate limiting and may need to be retried. Requires nic_ts_read_lock
+ * to be held.
+ */
+static int gve_ptp_read_timestamp(struct gve_ptp *ptp, cycles_t *pre_cycles,
+                                 cycles_t *post_cycles)
+{
+       unsigned long deadline = jiffies + msecs_to_jiffies(100);
+       unsigned long delay_us = 1000;
+       int err;
+
+       lockdep_assert_held(&ptp->nic_ts_read_lock);
+
+       do {
+               *pre_cycles = get_cycles();
+               err = gve_adminq_report_nic_ts(ptp->priv,
+                                              ptp->nic_ts_report_bus);
+
+               /* Prevent get_cycles() from being speculatively executed
+                * before the AdminQ command
+                */
+               rmb();
+               *post_cycles = get_cycles();
+               if (likely(err != -EAGAIN))
+                       return err;
+
+               fsleep(delay_us);
+
+               /* Exponential backoff */
+               delay_us *= 2;
+       } while (time_before(jiffies, deadline));
+
+       return -ETIMEDOUT;
+}
+
  /* Read the nic timestamp from hardware via the admin queue. */
-static int gve_clock_nic_ts_read(struct gve_ptp *ptp, u64 *nic_raw)
+static int gve_clock_nic_ts_read(struct gve_ptp *ptp, u64 *nic_raw,
+                                struct gve_sysclock_sample *sysclock)
  {
+       cycles_t host_pre_cycles, host_post_cycles;
+       struct gve_nic_ts_report *ts_report;
         int err;
  
         mutex_lock(&ptp->nic_ts_read_lock);
-       err = gve_adminq_report_nic_ts(ptp->priv, ptp->nic_ts_report_bus);
-       if (err)
+       err = gve_ptp_read_timestamp(ptp, &host_pre_cycles, &host_post_cycles);
+       if (err) {
+               dev_err_ratelimited(&ptp->priv->pdev->dev,
+                                   "AdminQ timestamp read failed: %d\n", err);
                 goto out;
+       }
+
+       ts_report = ptp->nic_ts_report;
+       *nic_raw = be64_to_cpu(ts_report->nic_timestamp);
  
-       *nic_raw = be64_to_cpu(ptp->nic_ts_report->nic_timestamp);
+       if (sysclock) {
+               sysclock->nic_pre_cycles = be64_to_cpu(ts_report->pre_cycles);
+               sysclock->nic_post_cycles = be64_to_cpu(ts_report->post_cycles);
+               sysclock->host_pre_cycles = host_pre_cycles;
+               sysclock->host_post_cycles = host_post_cycles;
+       }
  
  out:
         mutex_unlock(&ptp->nic_ts_read_lock);
         return err;
  }
  
+struct gve_cycles_to_clock_callback_ctx {
+       u64 cycles;
+};
+
+static int gve_cycles_to_clock_fn(ktime_t *device_time,
+                                 struct system_counterval_t *system_counterval,
+                                 void *ctx)
+{
+       struct gve_cycles_to_clock_callback_ctx *context = ctx;
+
+       *device_time = 0;
+
+       system_counterval->cycles = context->cycles;
+       system_counterval->use_nsecs = false;
+
+       if (IS_ENABLED(CONFIG_X86))
+               system_counterval->cs_id = CSID_X86_TSC;
+       else if (IS_ENABLED(CONFIG_ARM64))
+               system_counterval->cs_id = CSID_ARM_ARCH_COUNTER;
+       else
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+/*
+ * Convert a raw cycle count (e.g. from get_cycles()) to the system clock
+ * type specified by clockid. The system_time_snapshot must be taken before
+ * the cycle counter is sampled.
+ */
+static int gve_cycles_to_timespec64(struct gve_priv *priv, clockid_t clockid,
+                                   struct system_time_snapshot *snap,
+                                   u64 cycles, struct timespec64 *ts)
+{
+       struct gve_cycles_to_clock_callback_ctx ctx = {0};
+       struct system_device_crosststamp xtstamp;
+       int err;
+
+       ctx.cycles = cycles;
+       err = get_device_system_crosststamp(gve_cycles_to_clock_fn, &ctx, snap,
+                                           &xtstamp);
+       if (err) {
+               dev_err_ratelimited(&priv->pdev->dev,
+                                   "get_device_system_crosststamp() failed to convert %llu cycles to system time: %d\n",
+                                   cycles,
+                                   err);
+               return err;
+       }
+
+       switch (clockid) {
+       case CLOCK_REALTIME:
+               *ts = ktime_to_timespec64(xtstamp.sys_realtime);
+               break;
+       case CLOCK_MONOTONIC_RAW:
+               *ts = ktime_to_timespec64(xtstamp.sys_monoraw);
+               break;
+       default:
+               dev_err_ratelimited(&priv->pdev->dev,
+                                   "Cycle count conversion to clockid %d not supported\n",
+                                   clockid);
+               return -EOPNOTSUPP;
+       }
+
+       return 0;
+}
+
+static bool
+gve_can_use_system_ts_from_device(enum clocksource_ids system_clock_source,
+                                 clockid_t clockid)
+{
+       if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC_RAW)
+               return false;
+
+       /* If the system clock source matches the system clock
+        * returned by the AdminQ command, we can use the system
+        * timestamps returned by the device, otherwise we have to
+        * fall back to sampling system time from the host which
+        * is less accurate.
+        */
+       if (IS_ENABLED(CONFIG_X86))
+               return system_clock_source == CSID_X86_TSC;
+       else if (IS_ENABLED(CONFIG_ARM64))
+               return system_clock_source == CSID_ARM_ARCH_COUNTER;
+
+       return false;
+}
+
+static int gve_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+       return -EOPNOTSUPP;
+}
+
+static int gve_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+       return -EOPNOTSUPP;
+}
+
  static int gve_ptp_gettimex64(struct ptp_clock_info *info,
                               struct timespec64 *ts,
                               struct ptp_system_timestamp *sts)
  {
-       return -EOPNOTSUPP;
+       struct gve_ptp *ptp = container_of(info, struct gve_ptp, info);
+       struct gve_sysclock_sample sysclock = {0};
+       bool use_system_ts_from_device = false;
+       struct gve_priv *priv = ptp->priv;
+       struct system_time_snapshot snap;
+       u64 nic_ts;
+       int err;
+
+       if (sts) {
+               /* This snapshot is used both to query the current system
+                * clocksource and to convert the cycle counts returned
+                * by the AdminQ command to ktime. It does not need to be
+                * taken inside the retry loop because retries and lock
+                * contention are expected to be extremely rare.
+                *
+                * If the system clock source changes between here and
+                * when get_device_system_crosststamp() is called,
+                * get_device_system_crosststamp() will fail which will
+                * cause one failed sample, and the next one will succeed.
+                */
+               ktime_get_snapshot(&snap);
+               use_system_ts_from_device =
+                       gve_can_use_system_ts_from_device(snap.cs_id,
+                                                         sts->clockid);
+               if (use_system_ts_from_device)
+                       priv->ptp_precise_xtstamps++;
+               else
+                       priv->ptp_fallback_xtstamps++;
+       }
+
+       if (unlikely(!use_system_ts_from_device))
+               ptp_read_system_prets(sts);
+
+       err = gve_clock_nic_ts_read(ptp, &nic_ts, sts ? &sysclock : NULL);
+       if (err)
+               return err;
+
+       if (unlikely(!use_system_ts_from_device))
+               ptp_read_system_postts(sts);
+
+       if (sts && likely(use_system_ts_from_device)) {
+               /* Reject samples with out of order system clock values.
+                * Firmware must return valid non-zero cycle counts.
+                */
+               if (!(sysclock.host_pre_cycles <= sysclock.nic_pre_cycles &&
+                     sysclock.nic_pre_cycles  <= sysclock.nic_post_cycles &&
+                     sysclock.nic_post_cycles <= sysclock.host_post_cycles)) {
+                       dev_err_ratelimited(&priv->pdev->dev,
+                                           "AdminQ system clock cycle counts out of order. Expecting %llu <= %llu <= %llu <= %llu\n",
+                                           (u64)sysclock.host_pre_cycles,
+                                           sysclock.nic_pre_cycles,
+                                           sysclock.nic_post_cycles,
+                                           (u64)sysclock.host_post_cycles);
+                       return -EBADMSG;
+               }
+
+               err = gve_cycles_to_timespec64(priv, sts->clockid, &snap,
+                                              sysclock.nic_pre_cycles,
+                                              &sts->pre_ts);
+               if (err)
+                       return err;
+
+               err = gve_cycles_to_timespec64(priv, sts->clockid, &snap,
+                                              sysclock.nic_post_cycles,
+                                              &sts->post_ts);
+               if (err)
+                       return err;
+       }
+
+       *ts = ns_to_timespec64(nic_ts);
+
+       return 0;
  }
  
  static int gve_ptp_settime64(struct ptp_clock_info *info,
@@ -50,7 +283,7 @@ static long gve_ptp_do_aux_work(struct ptp_clock_info *info)
         if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv))
                 goto out;
  
-       err = gve_clock_nic_ts_read(ptp, &nic_raw);
+       err = gve_clock_nic_ts_read(ptp, &nic_raw, NULL);
         if (err) {
                 dev_err_ratelimited(&priv->pdev->dev, "%s read err %d\n",
                                     __func__, err);
@@ -65,6 +298,8 @@ out:
  static const struct ptp_clock_info gve_ptp_caps = {
         .owner          = THIS_MODULE,
         .name           = "gve clock",
+       .adjfine        = gve_ptp_adjfine,
+       .adjtime        = gve_ptp_adjtime,
         .gettimex64     = gve_ptp_gettimex64,
         .settime64      = gve_ptp_settime64,
         .do_aux_work    = gve_ptp_do_aux_work,
@@ -93,7 +328,7 @@ int gve_init_clock(struct gve_priv *priv)
                 goto free_ptp;
         }
  
-       err = gve_clock_nic_ts_read(ptp, &nic_raw);
+       err = gve_clock_nic_ts_read(ptp, &nic_raw, NULL);
         if (err) {
                 dev_err(&priv->pdev->dev, "failed to read NIC clock %d\n", err);
                 goto free_dma_mem;
author	Jordan Rhee <jordanrhee@google.com>
	Thu, 14 May 2026 22:58:42 +0000 (22:58 +0000)
committer	Jakub Kicinski <kuba@kernel.org>
	Wed, 20 May 2026 01:17:28 +0000 (18:17 -0700)
drivers/net/ethernet/google/gve/gve.h		patch \| blob \| blame \| history
drivers/net/ethernet/google/gve/gve_adminq.h		patch \| blob \| blame \| history
drivers/net/ethernet/google/gve/gve_ethtool.c		patch \| blob \| blame \| history
drivers/net/ethernet/google/gve/gve_ptp.c		patch \| blob \| blame \| history