From: Jakub Kicinski Date: Wed, 20 May 2026 22:11:51 +0000 (-0700) Subject: Revert "Merge branch 'gve-add-support-for-ptp-gettimex64'" X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0ae361f7e43ca820e1f6219759358b4e71dc26dc;p=thirdparty%2Flinux.git Revert "Merge branch 'gve-add-support-for-ptp-gettimex64'" This reverts commit 9587ed8137fb83d93f84b858337412f4500b21e9, reversing changes made to bcdfd9fb109e0c9d76c345b2346b6b75ed1f476d. Per tglx's objections: https://lore.kernel.org/87mrxtwzz9.ffs@tglx Signed-off-by: Jakub Kicinski --- diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 4de3ce60060e0..1d66d3834f7e6 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -792,9 +792,6 @@ struct gve_ptp { struct ptp_clock_info info; struct ptp_clock *clock; struct gve_priv *priv; - struct mutex nic_ts_read_lock; /* Protects nic_ts_report */ - struct gve_nic_ts_report *nic_ts_report; - dma_addr_t nic_ts_report_bus; }; struct gve_priv { @@ -880,14 +877,6 @@ struct gve_priv { u32 stats_report_trigger_cnt; /* count of device-requested stats-reports since last reset */ u32 suspend_cnt; /* count of times suspended */ u32 resume_cnt; /* count of times resumed */ - /* count of cross-timestamps attempted using system timestamps - * from the AQ command - */ - u32 ptp_precise_xtstamps; - /* count of cross-timestamps attempted using system timestamps sampled - * by the driver - */ - u32 ptp_fallback_xtstamps; struct workqueue_struct *gve_wq; struct work_struct service_task; struct work_struct stats_report_task; @@ -934,6 +923,8 @@ struct gve_priv { bool nic_timestamp_supported; struct gve_ptp *ptp; struct kernel_hwtstamp_config ts_config; + struct gve_nic_ts_report *nic_ts_report; + dma_addr_t nic_ts_report_bus; u64 last_sync_nic_counter; /* Clock counter from last NIC TS report */ }; @@ -1210,7 +1201,7 @@ static inline bool gve_supports_xdp_xmit(struct gve_priv *priv) static inline bool gve_is_clock_enabled(struct gve_priv *priv) { - return priv->ptp; + return priv->nic_ts_report; } /* gqi napi handler defined in gve_main.c */ @@ -1330,9 +1321,14 @@ int gve_flow_rules_reset(struct gve_priv *priv); int gve_init_rss_config(struct gve_priv *priv, u16 num_queues); /* PTP and timestamping */ #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) +int gve_clock_nic_ts_read(struct gve_priv *priv); int gve_init_clock(struct gve_priv *priv); void gve_teardown_clock(struct gve_priv *priv); #else /* CONFIG_PTP_1588_CLOCK */ +static inline int gve_clock_nic_ts_read(struct gve_priv *priv) +{ + return -EOPNOTSUPP; +} static inline int gve_init_clock(struct gve_priv *priv) { diff --git a/drivers/net/ethernet/google/gve/gve_adminq.c b/drivers/net/ethernet/google/gve/gve_adminq.c index 0d5a67523cbab..08587bf40ed4a 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.c +++ b/drivers/net/ethernet/google/gve/gve_adminq.c @@ -416,10 +416,16 @@ static bool gve_adminq_wait_for_cmd(struct gve_priv *priv, u32 prod_cnt) static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) { + if (status != GVE_ADMINQ_COMMAND_PASSED && + status != GVE_ADMINQ_COMMAND_UNSET) { + dev_err(&priv->pdev->dev, "AQ command failed with status %d\n", status); + priv->adminq_cmd_fail++; + } switch (status) { case GVE_ADMINQ_COMMAND_PASSED: return 0; case GVE_ADMINQ_COMMAND_UNSET: + dev_err(&priv->pdev->dev, "parse_aq_err: err and status both unset, this should not be possible.\n"); return -EINVAL; case GVE_ADMINQ_COMMAND_ERROR_ABORTED: case GVE_ADMINQ_COMMAND_ERROR_CANCELLED: @@ -449,27 +455,6 @@ static int gve_adminq_parse_err(struct gve_priv *priv, u32 status) } } -static bool gve_adminq_is_retryable(enum gve_adminq_opcodes opcode) -{ - switch (opcode) { - case GVE_ADMINQ_REPORT_NIC_TIMESTAMP: - return true; - default: - return false; - } -} - -static enum gve_adminq_opcodes gve_extract_opcode(union gve_adminq_command *cmd) -{ - u32 opcode; - - opcode = be32_to_cpu(READ_ONCE(cmd->opcode)); - if (opcode == GVE_ADMINQ_EXTENDED_COMMAND) - opcode = be32_to_cpu(cmd->extended_command.inner_opcode); - - return opcode; -} - /* Flushes all AQ commands currently queued and waits for them to complete. * If there are failures, it will return the first error. */ @@ -492,24 +477,14 @@ static int gve_adminq_kick_and_wait(struct gve_priv *priv) for (i = tail; i < head; i++) { union gve_adminq_command *cmd; - u32 status; - int err; + u32 status, err; cmd = &priv->adminq[i & priv->adminq_mask]; status = be32_to_cpu(READ_ONCE(cmd->status)); err = gve_adminq_parse_err(priv, status); - if (err) { - enum gve_adminq_opcodes opcode = gve_extract_opcode(cmd); - - priv->adminq_cmd_fail++; - if (!gve_adminq_is_retryable(opcode) || err != -EAGAIN) - dev_err_ratelimited(&priv->pdev->dev, - "AQ command %d failed with status %d\n", - opcode, status); - + if (err) // Return the first error if we failed. return err; - } } return 0; diff --git a/drivers/net/ethernet/google/gve/gve_adminq.h b/drivers/net/ethernet/google/gve/gve_adminq.h index e6dcf6da9091d..22a74b6aa17ea 100644 --- a/drivers/net/ethernet/google/gve/gve_adminq.h +++ b/drivers/net/ethernet/google/gve/gve_adminq.h @@ -411,8 +411,8 @@ static_assert(sizeof(struct gve_adminq_report_nic_ts) == 16); struct gve_nic_ts_report { __be64 nic_timestamp; /* NIC clock in nanoseconds */ - __be64 pre_cycles; /* System cycle counter before NIC clock read */ - __be64 post_cycles; /* System cycle counter after NIC clock read */ + __be64 reserved1; + __be64 reserved2; __be64 reserved3; __be64 reserved4; }; diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 8a088dcc3603e..dc2213b5ce246 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -46,7 +46,6 @@ static const char gve_gstrings_main_stats[][ETH_GSTRING_LEN] = { "rx_hsplit_unsplit_pkt", "interface_up_cnt", "interface_down_cnt", "reset_cnt", "page_alloc_fail", "dma_mapping_error", "stats_report_trigger_cnt", - "ptp_precise_xtstamps", "ptp_fallback_xtstamps", }; static const char gve_gstrings_rx_stats[][ETH_GSTRING_LEN] = { @@ -270,8 +269,6 @@ gve_get_ethtool_stats(struct net_device *netdev, data[i++] = priv->page_alloc_fail; data[i++] = priv->dma_mapping_error; data[i++] = priv->stats_report_trigger_cnt; - data[i++] = priv->ptp_precise_xtstamps; - data[i++] = priv->ptp_fallback_xtstamps; i = GVE_MAIN_STATS_LEN; rx_base_stats_idx = 0; @@ -975,7 +972,8 @@ static int gve_get_ts_info(struct net_device *netdev, info->rx_filters |= BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL); - info->phc_index = ptp_clock_index(priv->ptp->clock); + if (priv->ptp) + info->phc_index = ptp_clock_index(priv->ptp->clock); } return 0; diff --git a/drivers/net/ethernet/google/gve/gve_ptp.c b/drivers/net/ethernet/google/gve/gve_ptp.c index bc230e68eb1d3..06b1cf4a5efcc 100644 --- a/drivers/net/ethernet/google/gve/gve_ptp.c +++ b/drivers/net/ethernet/google/gve/gve_ptp.c @@ -10,261 +10,27 @@ /* Interval to schedule a nic timestamp calibration, 250ms. */ #define GVE_NIC_TS_SYNC_INTERVAL_MS 250 -/* - * Stores cycle counter samples in get_cycles() units from a - * sandwiched NIC clock read - */ -struct gve_sysclock_sample { - /* Cycle counter from NIC before clock read */ - u64 nic_pre_cycles; - /* Cycle counter from NIC after clock read */ - u64 nic_post_cycles; - /* Cycle counter from host before issuing AQ command */ - cycles_t host_pre_cycles; - /* Cycle counter from host after AQ command returns */ - cycles_t host_post_cycles; -}; - -/* - * Read NIC clock by issuing the AQ command. The command is subject to - * rate limiting and may need to be retried. Requires nic_ts_read_lock - * to be held. - */ -static int gve_ptp_read_timestamp(struct gve_ptp *ptp, cycles_t *pre_cycles, - cycles_t *post_cycles) -{ - unsigned long deadline = jiffies + msecs_to_jiffies(100); - unsigned long delay_us = 1000; - int err; - - lockdep_assert_held(&ptp->nic_ts_read_lock); - - do { - *pre_cycles = get_cycles(); - err = gve_adminq_report_nic_ts(ptp->priv, - ptp->nic_ts_report_bus); - - /* Prevent get_cycles() from being speculatively executed - * before the AdminQ command - */ - rmb(); - *post_cycles = get_cycles(); - if (likely(err != -EAGAIN)) - return err; - - fsleep(delay_us); - - /* Exponential backoff */ - delay_us *= 2; - } while (time_before(jiffies, deadline)); - - return -ETIMEDOUT; -} - /* Read the nic timestamp from hardware via the admin queue. */ -static int gve_clock_nic_ts_read(struct gve_ptp *ptp, u64 *nic_raw, - struct gve_sysclock_sample *sysclock) -{ - cycles_t host_pre_cycles, host_post_cycles; - struct gve_nic_ts_report *ts_report; - int err; - - mutex_lock(&ptp->nic_ts_read_lock); - err = gve_ptp_read_timestamp(ptp, &host_pre_cycles, &host_post_cycles); - if (err) { - dev_err_ratelimited(&ptp->priv->pdev->dev, - "AdminQ timestamp read failed: %d\n", err); - goto out; - } - - ts_report = ptp->nic_ts_report; - *nic_raw = be64_to_cpu(ts_report->nic_timestamp); - - if (sysclock) { - sysclock->nic_pre_cycles = be64_to_cpu(ts_report->pre_cycles); - sysclock->nic_post_cycles = be64_to_cpu(ts_report->post_cycles); - sysclock->host_pre_cycles = host_pre_cycles; - sysclock->host_post_cycles = host_post_cycles; - } - -out: - mutex_unlock(&ptp->nic_ts_read_lock); - return err; -} - -struct gve_cycles_to_clock_callback_ctx { - u64 cycles; -}; - -static int gve_cycles_to_clock_fn(ktime_t *device_time, - struct system_counterval_t *system_counterval, - void *ctx) +int gve_clock_nic_ts_read(struct gve_priv *priv) { - struct gve_cycles_to_clock_callback_ctx *context = ctx; - - *device_time = 0; - - system_counterval->cycles = context->cycles; - system_counterval->use_nsecs = false; - - if (IS_ENABLED(CONFIG_X86)) - system_counterval->cs_id = CSID_X86_TSC; - else if (IS_ENABLED(CONFIG_ARM64)) - system_counterval->cs_id = CSID_ARM_ARCH_COUNTER; - else - return -EOPNOTSUPP; - - return 0; -} - -/* - * Convert a raw cycle count (e.g. from get_cycles()) to the system clock - * type specified by clockid. The system_time_snapshot must be taken before - * the cycle counter is sampled. - */ -static int gve_cycles_to_timespec64(struct gve_priv *priv, clockid_t clockid, - struct system_time_snapshot *snap, - u64 cycles, struct timespec64 *ts) -{ - struct gve_cycles_to_clock_callback_ctx ctx = {0}; - struct system_device_crosststamp xtstamp; + u64 nic_raw; int err; - ctx.cycles = cycles; - err = get_device_system_crosststamp(gve_cycles_to_clock_fn, &ctx, snap, - &xtstamp); - if (err) { - dev_err_ratelimited(&priv->pdev->dev, - "get_device_system_crosststamp() failed to convert %llu cycles to system time: %d\n", - cycles, - err); + err = gve_adminq_report_nic_ts(priv, priv->nic_ts_report_bus); + if (err) return err; - } - switch (clockid) { - case CLOCK_REALTIME: - *ts = ktime_to_timespec64(xtstamp.sys_realtime); - break; - case CLOCK_MONOTONIC_RAW: - *ts = ktime_to_timespec64(xtstamp.sys_monoraw); - break; - default: - dev_err_ratelimited(&priv->pdev->dev, - "Cycle count conversion to clockid %d not supported\n", - clockid); - return -EOPNOTSUPP; - } + nic_raw = be64_to_cpu(priv->nic_ts_report->nic_timestamp); + WRITE_ONCE(priv->last_sync_nic_counter, nic_raw); return 0; } -static bool -gve_can_use_system_ts_from_device(enum clocksource_ids system_clock_source, - clockid_t clockid) -{ - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC_RAW) - return false; - - /* If the system clock source matches the system clock - * returned by the AdminQ command, we can use the system - * timestamps returned by the device, otherwise we have to - * fall back to sampling system time from the host which - * is less accurate. - */ - if (IS_ENABLED(CONFIG_X86)) - return system_clock_source == CSID_X86_TSC; - else if (IS_ENABLED(CONFIG_ARM64)) - return system_clock_source == CSID_ARM_ARCH_COUNTER; - - return false; -} - -static int gve_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) -{ - return -EOPNOTSUPP; -} - -static int gve_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) -{ - return -EOPNOTSUPP; -} - static int gve_ptp_gettimex64(struct ptp_clock_info *info, struct timespec64 *ts, struct ptp_system_timestamp *sts) { - struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); - struct gve_sysclock_sample sysclock = {0}; - bool use_system_ts_from_device = false; - struct gve_priv *priv = ptp->priv; - struct system_time_snapshot snap; - u64 nic_ts; - int err; - - if (sts) { - /* This snapshot is used both to query the current system - * clocksource and to convert the cycle counts returned - * by the AdminQ command to ktime. It does not need to be - * taken inside the retry loop because retries and lock - * contention are expected to be extremely rare. - * - * If the system clock source changes between here and - * when get_device_system_crosststamp() is called, - * get_device_system_crosststamp() will fail which will - * cause one failed sample, and the next one will succeed. - */ - ktime_get_snapshot(&snap); - use_system_ts_from_device = - gve_can_use_system_ts_from_device(snap.cs_id, - sts->clockid); - if (use_system_ts_from_device) - priv->ptp_precise_xtstamps++; - else - priv->ptp_fallback_xtstamps++; - } - - if (unlikely(!use_system_ts_from_device)) - ptp_read_system_prets(sts); - - err = gve_clock_nic_ts_read(ptp, &nic_ts, sts ? &sysclock : NULL); - if (err) - return err; - - if (unlikely(!use_system_ts_from_device)) - ptp_read_system_postts(sts); - - if (sts && likely(use_system_ts_from_device)) { - /* Reject samples with out of order system clock values. - * Firmware must return valid non-zero cycle counts. - */ - if (!(sysclock.host_pre_cycles <= sysclock.nic_pre_cycles && - sysclock.nic_pre_cycles <= sysclock.nic_post_cycles && - sysclock.nic_post_cycles <= sysclock.host_post_cycles)) { - dev_err_ratelimited(&priv->pdev->dev, - "AdminQ system clock cycle counts out of order. Expecting %llu <= %llu <= %llu <= %llu\n", - (u64)sysclock.host_pre_cycles, - sysclock.nic_pre_cycles, - sysclock.nic_post_cycles, - (u64)sysclock.host_post_cycles); - return -EBADMSG; - } - - err = gve_cycles_to_timespec64(priv, sts->clockid, &snap, - sysclock.nic_pre_cycles, - &sts->pre_ts); - if (err) - return err; - - err = gve_cycles_to_timespec64(priv, sts->clockid, &snap, - sysclock.nic_post_cycles, - &sts->post_ts); - if (err) - return err; - } - - *ts = ns_to_timespec64(nic_ts); - - return 0; + return -EOPNOTSUPP; } static int gve_ptp_settime64(struct ptp_clock_info *info, @@ -275,21 +41,17 @@ static int gve_ptp_settime64(struct ptp_clock_info *info, static long gve_ptp_do_aux_work(struct ptp_clock_info *info) { - struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); + const struct gve_ptp *ptp = container_of(info, struct gve_ptp, info); struct gve_priv *priv = ptp->priv; - u64 nic_raw; int err; if (gve_get_reset_in_progress(priv) || !gve_get_admin_queue_ok(priv)) goto out; - err = gve_clock_nic_ts_read(ptp, &nic_raw, NULL); - if (err) { - dev_err_ratelimited(&priv->pdev->dev, "%s read err %d\n", - __func__, err); - goto out; - } - WRITE_ONCE(priv->last_sync_nic_counter, nic_raw); + err = gve_clock_nic_ts_read(priv); + if (err && net_ratelimit()) + dev_err(&priv->pdev->dev, + "%s read err %d\n", __func__, err); out: return msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS); @@ -298,78 +60,99 @@ out: static const struct ptp_clock_info gve_ptp_caps = { .owner = THIS_MODULE, .name = "gve clock", - .adjfine = gve_ptp_adjfine, - .adjtime = gve_ptp_adjtime, .gettimex64 = gve_ptp_gettimex64, .settime64 = gve_ptp_settime64, .do_aux_work = gve_ptp_do_aux_work, }; -int gve_init_clock(struct gve_priv *priv) +static int gve_ptp_init(struct gve_priv *priv) { struct gve_ptp *ptp; - u64 nic_raw; int err; - ptp = kzalloc_obj(*priv->ptp); - if (!ptp) + priv->ptp = kzalloc_obj(*priv->ptp); + if (!priv->ptp) return -ENOMEM; + ptp = priv->ptp; ptp->info = gve_ptp_caps; - ptp->priv = priv; - mutex_init(&ptp->nic_ts_read_lock); - ptp->nic_ts_report = - dma_alloc_coherent(&priv->pdev->dev, - sizeof(struct gve_nic_ts_report), - &ptp->nic_ts_report_bus, GFP_KERNEL); - if (!ptp->nic_ts_report) { - dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__); - err = -ENOMEM; - goto free_ptp; - } - - err = gve_clock_nic_ts_read(ptp, &nic_raw, NULL); - if (err) { - dev_err(&priv->pdev->dev, "failed to read NIC clock %d\n", err); - goto free_dma_mem; - } - WRITE_ONCE(priv->last_sync_nic_counter, nic_raw); - ptp->clock = ptp_clock_register(&ptp->info, &priv->pdev->dev); + if (IS_ERR(ptp->clock)) { dev_err(&priv->pdev->dev, "PTP clock registration failed\n"); - err = PTR_ERR(ptp->clock); - goto free_dma_mem; + err = PTR_ERR(ptp->clock); + goto free_ptp; } - priv->ptp = ptp; - ptp_schedule_worker(ptp->clock, - msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS)); - + ptp->priv = priv; return 0; -free_dma_mem: - dma_free_coherent(&priv->pdev->dev, sizeof(struct gve_nic_ts_report), - ptp->nic_ts_report, ptp->nic_ts_report_bus); - ptp->nic_ts_report = NULL; free_ptp: - mutex_destroy(&ptp->nic_ts_read_lock); kfree(ptp); + priv->ptp = NULL; return err; } -void gve_teardown_clock(struct gve_priv *priv) +static void gve_ptp_release(struct gve_priv *priv) { struct gve_ptp *ptp = priv->ptp; if (!ptp) return; - priv->ptp = NULL; - ptp_clock_unregister(ptp->clock); - dma_free_coherent(&priv->pdev->dev, sizeof(struct gve_nic_ts_report), - ptp->nic_ts_report, ptp->nic_ts_report_bus); - ptp->nic_ts_report = NULL; - mutex_destroy(&ptp->nic_ts_read_lock); + if (ptp->clock) + ptp_clock_unregister(ptp->clock); + kfree(ptp); + priv->ptp = NULL; +} + +int gve_init_clock(struct gve_priv *priv) +{ + int err; + + err = gve_ptp_init(priv); + if (err) + return err; + + priv->nic_ts_report = + dma_alloc_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + &priv->nic_ts_report_bus, + GFP_KERNEL); + if (!priv->nic_ts_report) { + dev_err(&priv->pdev->dev, "%s dma alloc error\n", __func__); + err = -ENOMEM; + goto release_ptp; + } + err = gve_clock_nic_ts_read(priv); + if (err) { + dev_err(&priv->pdev->dev, "failed to read NIC clock %d\n", err); + goto release_nic_ts_report; + } + ptp_schedule_worker(priv->ptp->clock, + msecs_to_jiffies(GVE_NIC_TS_SYNC_INTERVAL_MS)); + + return 0; + +release_nic_ts_report: + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + priv->nic_ts_report, priv->nic_ts_report_bus); + priv->nic_ts_report = NULL; +release_ptp: + gve_ptp_release(priv); + return err; +} + +void gve_teardown_clock(struct gve_priv *priv) +{ + gve_ptp_release(priv); + + if (priv->nic_ts_report) { + dma_free_coherent(&priv->pdev->dev, + sizeof(struct gve_nic_ts_report), + priv->nic_ts_report, priv->nic_ts_report_bus); + priv->nic_ts_report = NULL; + } }