From d2e1e591ab957955794584aed911f701922cd2c7 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 9 Oct 2025 17:12:20 +0100 Subject: [PATCH] [gve] Use dummy interrupt to trigger completion writeback in DQO mode When operating in the DQO operating mode, the device will defer writing transmit and receive completions until an entire internal cacheline (128 bytes) is full, or until an associated interrupt is asserted. Since each receive descriptor is 32 bytes, this will cause received packets to be effectively delayed until up to three further packets have arrived. When network traffic volumes are very low (such as during DHCP, DNS lookups, or TCP handshakes), this typically induces delays of up to 30 seconds and results in a very poor user experience. Work around this hardware problem in the same way as for the Intel 40GbE and 100GbE NICs: by enabling dummy MSI-X interrupts to trick the hardware into believing that it needs to write out completions to host memory. There is no documentation around the interrupt rearming mechanism. The value written to the interrupt doorbell does not include a consumer counter value, and so must be relying on some undocumented ordering constraints. Comments in the Linux driver source suggest that the authors believe that the device will automatically and atomically mask an MSI-X interrupt at the point of asserting it, that any further interrupts arriving before the doorbell is written will be recorded in the pending bit array, and that writing the doorbell will therefore immediately assert a new interrupt if needed. In the absence of any documentation, choose to rearm the interrupt once per observed completion. This is overkill, but is less impactful than the alternative of rearming the interrupt unconditionally on every poll. Signed-off-by: Michael Brown --- src/drivers/net/gve.c | 30 +++++++++++++++++++++++++++++- src/drivers/net/gve.h | 10 ++++++++-- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/src/drivers/net/gve.c b/src/drivers/net/gve.c index 1696c249d..66e529abc 100644 --- a/src/drivers/net/gve.c +++ b/src/drivers/net/gve.c @@ -591,6 +591,7 @@ static int gve_configure ( struct gve_nic *gve ) { struct gve_events *events = &gve->events; struct gve_irqs *irqs = &gve->irqs; union gve_admin_command *cmd; + uint32_t doorbell; unsigned int db_off; unsigned int i; int rc; @@ -612,12 +613,14 @@ static int gve_configure ( struct gve_nic *gve ) { return rc; /* Disable all interrupts */ + doorbell = ( ( gve->mode & GVE_MODE_DQO ) ? + 0 : bswap_32 ( GVE_GQI_IRQ_DISABLE ) ); for ( i = 0 ; i < GVE_IRQ_COUNT ; i++ ) { db_off = ( be32_to_cpu ( irqs->irq[i].db_idx ) * sizeof ( uint32_t ) ); DBGC ( gve, "GVE %p IRQ %d doorbell +%#04x\n", gve, i, db_off ); irqs->db[i] = ( gve->db + db_off ); - writel ( bswap_32 ( GVE_IRQ_DISABLE ), irqs->db[i] ); + writel ( doorbell, irqs->db[i] ); } return 0; @@ -810,6 +813,13 @@ static int gve_create_queue ( struct gve_nic *gve, struct gve_queue *queue ) { queue->event = &gve->events.event[evt_idx]; assert ( queue->event->count == 0 ); + /* Unmask dummy interrupt */ + pci_msix_unmask ( &gve->msix, type->irq ); + + /* Rearm queue interrupt if applicable */ + if ( gve->mode & GVE_MODE_DQO ) + writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[type->irq] ); + return 0; } @@ -824,6 +834,9 @@ static int gve_destroy_queue ( struct gve_nic *gve, struct gve_queue *queue ) { const struct gve_queue_type *type = queue->type; int rc; + /* Mask dummy interrupt */ + pci_msix_mask ( &gve->msix, type->irq ); + /* Issue command */ if ( ( rc = gve_admin_simple ( gve, type->destroy, 0 ) ) != 0 ) return rc; @@ -1496,6 +1509,9 @@ static void gve_poll_tx ( struct net_device *netdev ) { rmb(); tx->done++; + /* Re-arm interrupt */ + writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_TX_IRQ] ); + /* Ignore non-packet completions */ if ( ( ! ( dqo->flags & GVE_DQO_TXF_PKT ) ) || ( dqo->tag.count < 0 ) ) { @@ -1586,6 +1602,9 @@ static void gve_poll_rx ( struct net_device *netdev ) { break; rmb(); + /* Re-arm interrupt */ + writel ( GVE_DQO_IRQ_REARM, gve->irqs.db[GVE_RX_IRQ] ); + /* Parse completion */ len = ( le16_to_cpu ( dqo->len ) & ( GVE_BUF_SIZE - 1 ) ); @@ -1917,6 +1936,10 @@ static int gve_probe ( struct pci_device *pci ) { dma_set_mask_64bit ( gve->dma ); assert ( netdev->dma == NULL ); + /* Configure dummy MSI-X interrupt */ + if ( ( rc = pci_msix_enable ( pci, &gve->msix ) ) != 0 ) + goto err_msix; + /* Allocate admin queue */ if ( ( rc = gve_admin_alloc ( gve ) ) != 0 ) goto err_admin; @@ -1937,6 +1960,8 @@ static int gve_probe ( struct pci_device *pci ) { gve_reset ( gve ); gve_admin_free ( gve ); err_admin: + pci_msix_disable ( pci, &gve->msix ); + err_msix: iounmap ( gve->db ); err_db: iounmap ( gve->cfg ); @@ -1965,6 +1990,9 @@ static void gve_remove ( struct pci_device *pci ) { /* Free admin queue */ gve_admin_free ( gve ); + /* Disable dummy MSI-X interrupt */ + pci_msix_disable ( pci, &gve->msix ); + /* Unmap registers */ iounmap ( gve->db ); iounmap ( gve->cfg ); diff --git a/src/drivers/net/gve.h b/src/drivers/net/gve.h index f185aca4a..29928a34a 100644 --- a/src/drivers/net/gve.h +++ b/src/drivers/net/gve.h @@ -16,6 +16,7 @@ FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL ); #include #include #include +#include #include #include #include @@ -443,8 +444,11 @@ struct gve_irqs { volatile uint32_t *db[GVE_IRQ_COUNT]; }; -/** Disable interrupts */ -#define GVE_IRQ_DISABLE 0x40000000UL +/** Disable in-order queue interrupt */ +#define GVE_GQI_IRQ_DISABLE 0x40000000UL + +/** Rearm out-of-order queue interrupt */ +#define GVE_DQO_IRQ_REARM 0x00000019UL /** * Queue resources @@ -856,6 +860,8 @@ struct gve_nic { struct net_device *netdev; /** DMA device */ struct dma_device *dma; + /** Dummy MSI-X interrupt */ + struct pci_msix msix; /** Admin queue */ struct gve_admin admin; -- 2.47.3