]> git.ipfire.org Git - thirdparty/ipxe.git/commitdiff
[gve] Add support for out-of-order queues
authorMichael Brown <mcb30@ipxe.org>
Mon, 6 Oct 2025 13:04:11 +0000 (14:04 +0100)
committerMichael Brown <mcb30@ipxe.org>
Mon, 6 Oct 2025 13:04:12 +0000 (14:04 +0100)
Add support for the "DQO" out-of-order transmit and receive queue
formats.  These are almost entirely different in format and usage (and
even endianness) from the original "GQI" in-order transmit and receive
queues, and arguably should belong to a completely different device
with a different PCI ID.  However, Google chose to essentially crowbar
two unrelated device models into the same virtual hardware, and so we
must handle both of these device models within the same driver.

Most of the new code exists solely to handle the differences in
descriptor sizes and formats.  Out-of-order completions are handled
via a buffer ID ring (as with other devices supporting out-of-order
completions, such as the Xen, Hyper-V, and Amazon virtual NICs).  A
slight twist is that on the transmit datapath (but not the receive
datapath) the Google NIC provides only one completion per packet
instead of one completion per descriptor, and so we must record the
list of chained buffer IDs in a separate array at the time of
transmission.

Signed-off-by: Michael Brown <mcb30@ipxe.org>
src/drivers/net/gve.c
src/drivers/net/gve.h

index fa8a15407ac2663de49cb4166ff054d565391a19..8759d71a949d5a78921480a7bcfba6288b0b3346 100644 (file)
@@ -759,17 +759,20 @@ static int gve_create_queue ( struct gve_nic *gve, struct gve_queue *queue ) {
        /* Reset queue */
        queue->prod = 0;
        queue->cons = 0;
+       queue->done = 0;
        memset ( queue->desc.raw, 0, ( queue->count * stride->desc ) );
        memset ( queue->cmplt.raw, 0, ( queue->count * stride->cmplt ) );
        for ( i = 0 ; i < queue->fill ; i++ )
                queue->tag[i] = i;
 
-       /* Pre-populate descriptor offsets */
-       buf = ( queue->desc.raw + stride->desc - sizeof ( *buf ) );
-       for ( i = 0 ; i < queue->count ; i++ ) {
-               tag = ( i & ( queue->fill - 1 ) );
-               buf->addr = cpu_to_be64 ( gve_address ( queue, tag ) );
-               buf = ( ( ( void * ) buf ) + stride->desc );
+       /* Pre-populate descriptor offsets for in-order queues */
+       if ( ! ( gve->mode & GVE_MODE_DQO ) ) {
+               buf = ( queue->desc.raw + stride->desc - sizeof ( *buf ) );
+               for ( i = 0 ; i < queue->count ; i++ ) {
+                       tag = ( i & ( queue->fill - 1 ) );
+                       buf->addr = cpu_to_be64 ( gve_address ( queue, tag ) );
+                       buf = ( ( ( void * ) buf ) + stride->desc );
+               }
        }
 
        /* Construct request */
@@ -981,7 +984,8 @@ static int gve_alloc_queue ( struct gve_nic *gve, struct gve_queue *queue ) {
        }
 
        /* Set queue strides and calculate total lengths */
-       *stride = type->stride.gqi;
+       *stride = ( ( gve->mode & GVE_MODE_DQO ) ?
+                   type->stride.dqo : type->stride.gqi );
        desc_len = ( queue->count * stride->desc );
        cmplt_len = ( queue->count * stride->cmplt );
        res_len = sizeof ( *queue->res );
@@ -1340,11 +1344,15 @@ static int gve_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
        struct gve_nic *gve = netdev->priv;
        struct gve_queue *tx = &gve->tx;
        struct gve_gqi_tx_descriptor *gqi;
+       struct gve_dqo_tx_descriptor *dqo;
        unsigned int count;
        unsigned int index;
        unsigned int tag;
+       unsigned int chain;
+       uint32_t doorbell;
        size_t frag_len;
        size_t offset;
+       size_t next;
        size_t len;
 
        /* Do nothing if queues are not yet set up */
@@ -1360,8 +1368,7 @@ static int gve_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
        }
 
        /* Copy packet to queue pages and populate descriptors */
-       offset = 0;
-       while ( 1 ) {
+       for ( offset = 0, chain = 0 ; ; offset = next, chain = tag ) {
 
                /* Identify next available buffer */
                index = ( tx->prod++ & ( tx->count - 1 ) );
@@ -1376,38 +1383,66 @@ static int gve_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
                        frag_len = GVE_BUF_SIZE;
                memcpy ( gve_buffer ( tx, tag ),
                         ( iobuf->data + offset ), frag_len );
+               next = ( offset + frag_len );
 
                /* Populate descriptor */
-               gqi = &tx->desc.tx.gqi[index];
-               if ( offset ) {
-                       gqi->type = GVE_GQI_TX_TYPE_CONT;
-                       gqi->count = 0;
-                       gqi->total = 0;
+               if ( gve->mode & GVE_MODE_DQO ) {
+
+                       /* Out-of-order descriptor */
+                       dqo = &tx->desc.tx.dqo[index];
+                       dqo->buf.addr =
+                               cpu_to_le64 ( gve_address ( tx, tag ) );
+                       if ( next == len ) {
+                               dqo->type = ( GVE_DQO_TX_TYPE_PACKET |
+                                             GVE_DQO_TX_TYPE_LAST |
+                                             GVE_DQO_TX_TYPE_REPORT );
+                               dqo->tag.id = tag;
+                               dqo->tag.count = count;
+                       } else {
+                               dqo->type = GVE_DQO_TX_TYPE_PACKET;
+                               dqo->tag.id = 0;
+                               dqo->tag.count = 0;
+                       }
+                       dqo->len = cpu_to_le16 ( frag_len );
+                       gve->tx_chain[tag] = chain;
+
                } else {
-                       gqi->type = GVE_GQI_TX_TYPE_START;
-                       gqi->count = count;
-                       gqi->total = cpu_to_be16 ( len );
+
+                       /* In-order descriptor */
+                       gqi = &tx->desc.tx.gqi[index];
+                       if ( offset ) {
+                               gqi->type = GVE_GQI_TX_TYPE_CONT;
+                               gqi->count = 0;
+                               gqi->total = 0;
+                       } else {
+                               gqi->type = GVE_GQI_TX_TYPE_START;
+                               gqi->count = count;
+                               gqi->total = cpu_to_be16 ( len );
+                       }
+                       gqi->len = cpu_to_be16 ( frag_len );
+
                }
-               gqi->len = cpu_to_be16 ( frag_len );
-               DBGC2 ( gve, "GVE %p TX %#04x %#02x:%#02x len %#04x/%#04x at "
-                       "%#08zx\n", gve, index, gqi->type, gqi->count,
-                       be16_to_cpu ( gqi->len ), be16_to_cpu ( gqi->total ),
-                       gve_offset ( tx, tag ) );
-
-               /* Move to next descriptor */
-               offset += frag_len;
-               if ( offset < len )
-                       continue;
+               DBGC2 ( gve, "GVE %p TXD %#04x %#02x:%#02x len %#04zx/%#04zx "
+                       "at %#08lx\n", gve, index, tag, count, frag_len, len,
+                       gve_address ( tx, tag ) );
 
                /* Record I/O buffer against final descriptor */
-               gve->tx_iobuf[tag] = iobuf;
-               break;
+               if ( next == len ) {
+                       gve->tx_iobuf[tag] = iobuf;
+                       break;
+               }
        }
        assert ( ( tx->prod - tx->cons ) <= tx->fill );
 
        /* Ring doorbell */
+       doorbell = tx->prod;
+       if ( gve->mode & GVE_MODE_DQO ) {
+               doorbell &= ( tx->count - 1 );
+       } else {
+               doorbell = bswap_32 ( doorbell );
+       }
        wmb();
-       writel ( bswap_32 ( tx->prod ), tx->db );
+       writel ( doorbell, tx->db );
 
        return 0;
 }
@@ -1420,22 +1455,76 @@ static int gve_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
 static void gve_poll_tx ( struct net_device *netdev ) {
        struct gve_nic *gve = netdev->priv;
        struct gve_queue *tx = &gve->tx;
+       struct gve_dqo_tx_completion *dqo;
        struct io_buffer *iobuf;
+       unsigned int index;
+       unsigned int gen;
+       unsigned int bit;
        unsigned int tag;
        uint32_t count;
 
-       /* Read event counter */
-       count = be32_to_cpu ( tx->event->count );
-
        /* Process transmit completions */
-       while ( count != tx->cons ) {
-               DBGC2 ( gve, "GVE %p TX %#04x complete\n", gve, tx->cons );
-               tag = ( tx->cons % GVE_TX_FILL );
-               iobuf = gve->tx_iobuf[tag];
-               gve->tx_iobuf[tag] = NULL;
-               tx->cons++;
-               if ( iobuf )
-                       netdev_tx_complete ( netdev, iobuf );
+       if ( gve->mode & GVE_MODE_DQO ) {
+
+               /* Out-of-order completions */
+               while ( 1 ) {
+
+                       /* Read next possible completion */
+                       gen = ( tx->done & tx->count );
+                       index = ( tx->done & ( tx->count - 1 ) );
+                       dqo = &tx->cmplt.tx.dqo[index];
+
+                       /* Check generation bit */
+                       bit = ( dqo->flags & GVE_DQO_TXF_GEN );
+                       if ( ( !! bit ) == ( !! gen ) )
+                               break;
+                       tx->done++;
+
+                       /* Ignore non-packet completions */
+                       if ( ( ! ( dqo->flags & GVE_DQO_TXF_PKT ) ) ||
+                            ( dqo->tag.count < 0 ) ) {
+                               DBGC2 ( gve, "GVE %p TXC %#04x flags %#02x "
+                                       "ignored\n", gve, index, dqo->flags );
+                               continue;
+                       }
+
+                       /* Parse completion */
+                       tag = dqo->tag.id;
+                       count = dqo->tag.count;
+                       iobuf = gve->tx_iobuf[tag];
+                       gve->tx_iobuf[tag] = NULL;
+                       assert ( iobuf != NULL );
+
+                       /* Return completed descriptors to ring */
+                       while ( count-- ) {
+                               DBGC2 ( gve, "GVE %p TXC %#04x %#02x:%#02x "
+                                       "complete\n", gve, index, tag,
+                                       dqo->tag.count );
+                               tx->tag[ tx->cons++ % GVE_TX_FILL ] = tag;
+                               tag = gve->tx_chain[tag];
+                       }
+
+                       /* Hand off to network stack */
+                       if ( iobuf )
+                               netdev_tx_complete ( netdev, iobuf );
+               }
+
+       } else {
+
+               /* Read event counter */
+               count = be32_to_cpu ( tx->event->count );
+
+               /* Process transmit completions */
+               while ( count != tx->cons ) {
+                       DBGC2 ( gve, "GVE %p TXC %#04x complete\n",
+                               gve, tx->cons );
+                       tag = ( tx->cons % GVE_TX_FILL );
+                       iobuf = gve->tx_iobuf[tag];
+                       gve->tx_iobuf[tag] = NULL;
+                       tx->cons++;
+                       if ( iobuf )
+                               netdev_tx_complete ( netdev, iobuf );
+               }
        }
 }
 
@@ -1448,59 +1537,107 @@ static void gve_poll_rx ( struct net_device *netdev ) {
        struct gve_nic *gve = netdev->priv;
        struct gve_queue *rx = &gve->rx;
        struct gve_gqi_rx_completion *gqi;
+       struct gve_dqo_rx_completion *dqo;
        struct io_buffer *iobuf;
        unsigned int index;
+       unsigned int gen;
+       unsigned int bit;
        unsigned int seq;
        unsigned int tag;
-       uint32_t cons;
+       uint32_t done;
        size_t total;
        size_t len;
        int rc;
 
        /* Process receive completions */
-       cons = rx->cons;
+       done = rx->done;
        seq = gve->seq;
        total = 0;
        while ( 1 ) {
 
                /* Read next possible completion */
-               index = ( cons++ & ( rx->count - 1 ) );
-               gqi = &rx->cmplt.rx.gqi[index];
+               rc = 0;
+               gen = ( done & rx->count );
+               index = ( done++ & ( rx->count - 1 ) );
+               if ( gve->mode & GVE_MODE_DQO ) {
+
+                       /* Out-of-order completion */
+                       dqo = &rx->cmplt.rx.dqo[index];
+
+                       /* Check generation bit */
+                       bit = ( dqo->len & cpu_to_le16 ( GVE_DQO_RXL_GEN ) );
+                       if ( ( !! bit ) == ( !! gen ) )
+                               break;
+
+                       /* Parse completion */
+                       len = ( le16_to_cpu ( dqo->len ) &
+                               ( GVE_BUF_SIZE - 1 ) );
+                       tag = dqo->tag;
+                       DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx "
+                               "at %#08zx\n", gve, index, tag, dqo->flags,
+                               len, gve_offset ( rx, tag ) );
+
+                       /* Accumulate a complete packet */
+                       if ( dqo->status & GVE_DQO_RXS_ERROR ) {
+                               rc = -EIO;
+                               total = 0;
+                       } else {
+                               total += len;
+                               if ( ! ( dqo->flags & GVE_DQO_RXF_LAST ) )
+                                       continue;
+                       }
 
-               /* Check sequence number */
-               if ( ( gqi->seq & GVE_GQI_RX_SEQ_MASK ) != seq )
-                       break;
-               seq = gve_next ( seq );
-
-               /* Parse completion */
-               len = be16_to_cpu ( gqi->len );
-               tag = ( index % GVE_RX_FILL );
-               DBGC2 ( gve, "GVE %p RX %#04x %#02x:%#02x len %#04zx at "
-                       "%#08zx\n", gve, index, gqi->seq, gqi->flags,
-                       len, gve_offset ( rx, tag ) );
-
-               /* Accumulate a complete packet */
-               if ( gqi->flags & GVE_GQI_RXF_ERROR ) {
-                       total = 0;
                } else {
-                       total += len;
-                       if ( gqi->flags & GVE_GQI_RXF_MORE )
-                               continue;
+
+                       /* In-order completion */
+                       gqi = &rx->cmplt.rx.gqi[index];
+
+                       /* Check sequence number */
+                       if ( ( gqi->seq & GVE_GQI_RX_SEQ_MASK ) != seq )
+                               break;
+                       seq = gve_next ( seq );
+
+                       /* Parse completion */
+                       len = be16_to_cpu ( gqi->len );
+                       tag = ( index % GVE_RX_FILL );
+                       DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx "
+                               "at %#08zx\n", gve, index, gqi->seq,
+                               gqi->flags, len, gve_offset ( rx, tag ) );
+
+                       /* Accumulate a complete packet */
+                       if ( gqi->flags & GVE_GQI_RXF_ERROR ) {
+                               rc = -EIO;
+                               total = 0;
+                       } else {
+                               total += len;
+                               if ( gqi->flags & GVE_GQI_RXF_MORE )
+                                       continue;
+                       }
+                       gve->seq = seq;
                }
-               gve->seq = seq;
 
                /* Allocate and populate I/O buffer */
                iobuf = ( total ? alloc_iob ( total ) : NULL );
-               for ( ; rx->cons != cons ; rx->cons++ ) {
-
-                       /* Re-read completion */
-                       index = ( rx->cons & ( rx->count - 1 ) );
-                       gqi = &rx->cmplt.rx.gqi[index];
-                       tag = ( index % GVE_RX_FILL );
+               for ( ; rx->done != done ; rx->done++ ) {
+
+                       /* Re-read completion and return tag to ring */
+                       index = ( rx->done & ( rx->count - 1 ) );
+                       if ( gve->mode & GVE_MODE_DQO ) {
+                               dqo = &rx->cmplt.rx.dqo[index];
+                               tag = dqo->tag;
+                               len = ( le16_to_cpu ( dqo->len ) &
+                                       ( GVE_BUF_SIZE - 1 ) );
+                               rx->tag[ rx->cons++ % GVE_RX_FILL ] = tag;
+                       } else {
+                               gqi = &rx->cmplt.rx.gqi[index];
+                               tag = ( index % GVE_RX_FILL );
+                               len = be16_to_cpu ( gqi->len );
+                               assert ( rx->cons == rx->done );
+                               rx->cons++;
+                       }
 
                        /* Copy data */
                        if ( iobuf ) {
-                               len = be16_to_cpu ( gqi->len );
                                memcpy ( iob_put ( iobuf, len ),
                                         gve_buffer ( rx, tag ), len );
                        }
@@ -1510,18 +1647,12 @@ static void gve_poll_rx ( struct net_device *netdev ) {
 
                /* Hand off packet to network stack */
                if ( iobuf ) {
-                       iob_pull ( iobuf, GVE_RX_PAD );
+                       if ( ! ( gve->mode & GVE_MODE_DQO ) )
+                               iob_pull ( iobuf, GVE_GQI_RX_PAD );
                        netdev_rx ( netdev, iobuf );
                } else {
-                       rc = ( ( gqi->flags & GVE_GQI_RXF_ERROR ) ?
-                              -EIO : -ENOMEM );
-                       netdev_rx_err ( netdev, NULL, rc );
+                       netdev_rx_err ( netdev, NULL, ( rc ? rc : -ENOMEM ) );
                }
-
-               /* Sanity check */
-               assert ( rx->cons == cons );
-               assert ( gve->seq == seq );
-               assert ( total == 0 );
        }
 }
 
@@ -1533,20 +1664,60 @@ static void gve_poll_rx ( struct net_device *netdev ) {
 static void gve_refill_rx ( struct net_device *netdev ) {
        struct gve_nic *gve = netdev->priv;
        struct gve_queue *rx = &gve->rx;
-       unsigned int prod;
+       struct gve_dqo_rx_descriptor *dqo;
+       unsigned int refill;
+       unsigned int index;
+       unsigned int tag;
+       uint32_t doorbell;
 
-       /* The receive descriptors are prepopulated at the time of
-        * creating the receive queue (pointing to the preallocated
-        * queue pages).  Refilling is therefore just a case of
-        * ringing the doorbell if the device is not yet aware of any
-        * available descriptors.
-        */
-       prod = ( rx->cons + rx->fill );
-       if ( prod != rx->prod ) {
-               rx->prod = prod;
-               writel ( bswap_32 ( prod ), rx->db );
-               DBGC2 ( gve, "GVE %p RX %#04x ready\n", gve, rx->prod );
+       /* Calculate refill quantity */
+       doorbell = ( rx->cons + rx->fill );
+       refill = ( doorbell - rx->prod );
+       if ( ! refill )
+               return;
+
+       /* Refill ring */
+       if ( gve->mode & GVE_MODE_DQO ) {
+
+               /* Out-of-order descriptors */
+               while ( refill-- ) {
+
+                       /* Identify next available buffer */
+                       index = ( rx->prod++ & ( rx->count - 1 ) );
+                       tag = rx->tag[ index % GVE_RX_FILL ];
+
+                       /* Populate descriptor */
+                       dqo = &rx->desc.rx.dqo[index];
+                       dqo->tag = tag;
+                       dqo->buf.addr =
+                               cpu_to_le64 ( gve_address ( rx, tag ) );
+                       DBGC2 ( gve, "GVE %p RXD %#04x:%#02x at %#08llx\n",
+                               gve, index, dqo->tag,
+                               ( ( unsigned long long )
+                                 le64_to_cpu ( dqo->buf.addr ) ) );
+               }
+               wmb();
+               assert ( rx->prod == doorbell );
+
+       } else {
+
+               /* The in-order receive descriptors are prepopulated
+                * at the time of creating the receive queue (pointing
+                * to the preallocated queue pages).  Refilling is
+                * therefore just a case of ringing the doorbell if
+                * the device is not yet aware of any available
+                * descriptors.
+                */
+               rx->prod += refill;
+               assert ( rx->prod == doorbell );
+               DBGC2 ( gve, "GVE %p RXD %#04x ready\n", gve, rx->prod );
+
+               /* Doorbell is big-endian */
+               doorbell = bswap_32 ( doorbell );
        }
+
+       /* Ring doorbell */
+       writel ( doorbell, rx->db );
 }
 
 /**
@@ -1596,6 +1767,10 @@ static const struct gve_queue_type gve_tx_type = {
                .gqi = {
                        .desc = sizeof ( struct gve_gqi_tx_descriptor ),
                },
+               .dqo = {
+                       .desc = sizeof ( struct gve_dqo_tx_descriptor ),
+                       .cmplt = sizeof ( struct gve_dqo_tx_completion ),
+               },
        },
        .create = GVE_ADMIN_CREATE_TX,
        .destroy = GVE_ADMIN_DESTROY_TX,
@@ -1613,6 +1788,10 @@ static const struct gve_queue_type gve_rx_type = {
                        .desc = sizeof ( struct gve_gqi_rx_descriptor ),
                        .cmplt = sizeof ( struct gve_gqi_rx_completion ),
                },
+               .dqo = {
+                       .desc = sizeof ( struct gve_dqo_rx_descriptor ),
+                       .cmplt = sizeof ( struct gve_dqo_rx_completion ),
+               },
        },
        .create = GVE_ADMIN_CREATE_RX,
        .destroy = GVE_ADMIN_DESTROY_RX,
index c15cb808f49a55db85751c35f3492fc8e4c73361..f185aca4a2a93508e74c92e14c078573184053f5 100644 (file)
@@ -539,9 +539,14 @@ struct gve_qpl {
 /**
  * Maximum number of transmit buffers
  *
- * This is a policy decision.
+ * This is a policy decision.  Experiments suggest that out-of-order
+ * transmit queues will write completions only in batches of 128
+ * bytes, comprising 8 descriptor completions and 8 packet
+ * completions.  The transmit fill level must therefore be greater
+ * than 8, so that completions will be written out before the transmit
+ * ring runs out of space.
  */
-#define GVE_TX_FILL 8
+#define GVE_TX_FILL 16
 
 /** Transmit queue page list ID */
 #define GVE_TX_QPL 0x18ae5458
@@ -577,6 +582,65 @@ struct gve_gqi_tx_descriptor {
 /** Continuation of packet transmit descriptor type */
 #define GVE_GQI_TX_TYPE_CONT 0x20
 
+/** An out-of-order transmit tag
+ *
+ * From the hardware perspective, this is an opaque 15-bit (sic) value
+ * that is simply copied from the descriptor to the corresponding
+ * completion.
+ */
+struct gve_dqo_tx_tag {
+       /** Buffer index within queue page list */
+       uint8_t id;
+       /** Number of descriptors covered by this completion
+        *
+        * Note that this is a 7-bit quantity: the high bit may be
+        * (ab)used by the hardware to indicate that a completion is a
+        * terminologically undefined "miss" completion.
+        */
+       int8_t count;
+} __attribute__ (( packed ));
+
+/** An out-of-order transmit descriptor */
+struct gve_dqo_tx_descriptor {
+       /** Buffer descriptor */
+       struct gve_buffer buf;
+       /** Descriptor type and flags */
+       uint8_t type;
+       /** Reserved */
+       uint8_t reserved_a[3];
+       /** Tag */
+       struct gve_dqo_tx_tag tag;
+       /** Length of this descriptor */
+       uint16_t len;
+} __attribute__ (( packed ));
+
+/** Normal packet transmit descriptor type */
+#define GVE_DQO_TX_TYPE_PACKET 0x0c
+
+/** Last transmit descriptor in a packet */
+#define GVE_DQO_TX_TYPE_LAST 0x20
+
+/** Report transmit completion */
+#define GVE_DQO_TX_TYPE_REPORT 0x80
+
+/** An out-of-order transmit completion */
+struct gve_dqo_tx_completion {
+       /** Reserved */
+       uint8_t reserved_a[1];
+       /** Completion flags */
+       uint8_t flags;
+       /** Tag */
+       struct gve_dqo_tx_tag tag;
+       /** Reserved */
+       uint8_t reserved_b[4];
+} __attribute__ (( packed ));
+
+/** Transmit completion packet flag */
+#define GVE_DQO_TXF_PKT 0x10
+
+/** Transmit completion generation flag */
+#define GVE_DQO_TXF_GEN 0x80
+
 /**
  * Maximum number of receive buffers
  *
@@ -620,7 +684,50 @@ struct gve_gqi_rx_completion {
 } __attribute__ (( packed ));
 
 /** Padding at the start of all received packets */
-#define GVE_RX_PAD 2
+#define GVE_GQI_RX_PAD 2
+
+/** An out-of-order receive descriptor */
+struct gve_dqo_rx_descriptor {
+       /** Tag */
+       uint8_t tag;
+       /** Reserved */
+       uint8_t reserved_a[7];
+       /** Buffer descriptor */
+       struct gve_buffer buf;
+       /** Reserved */
+       uint8_t reserved_b[16];
+} __attribute__ (( packed ));
+
+/** An out-of-order receive completion */
+struct gve_dqo_rx_completion {
+       /** Reserved */
+       uint8_t reserved_a[1];
+       /** Status */
+       uint8_t status;
+       /** Reserved */
+       uint8_t reserved_b[2];
+       /** Length and generation bit */
+       uint16_t len;
+       /** Reserved */
+       uint8_t reserved_c[2];
+       /** Flags */
+       uint8_t flags;
+       /** Reserved */
+       uint8_t reserved_d[3];
+       /** Tag */
+       uint8_t tag;
+       /** Reserved */
+       uint8_t reserved_e[19];
+} __attribute__ (( packed ));
+
+/** Receive error */
+#define GVE_DQO_RXS_ERROR 0x04
+
+/** Receive completion generation flag */
+#define GVE_DQO_RXL_GEN 0x4000
+
+/** Last receive descriptor in a packet */
+#define GVE_DQO_RXF_LAST 0x02
 
 /** Queue strides */
 struct gve_queue_stride {
@@ -638,21 +745,32 @@ struct gve_queue {
                union {
                        /** In-order transmit descriptors */
                        struct gve_gqi_tx_descriptor *gqi;
+                       /** Out-of-order transmit descriptors */
+                       struct gve_dqo_tx_descriptor *dqo;
                } tx;
                /** Receive descriptors */
                union {
                        /** In-order receive descriptors */
                        struct gve_gqi_rx_descriptor *gqi;
+                       /** Out-of-order receive descriptors */
+                       struct gve_dqo_rx_descriptor *dqo;
                } rx;
                /** Raw data */
                void *raw;
        } desc;
        /** Completion ring */
        union {
+               /** Transmit completions */
+               union {
+                       /** Out-of-order transmit completions */
+                       struct gve_dqo_tx_completion *dqo;
+               } tx;
                /** Receive completions */
                union {
                        /** In-order receive completions */
                        struct gve_gqi_rx_completion *gqi;
+                       /** Out-of-order receive completions */
+                       struct gve_dqo_rx_completion *dqo;
                } rx;
                /** Raw data */
                void *raw;
@@ -685,6 +803,8 @@ struct gve_queue {
        uint32_t prod;
        /** Consumer counter */
        uint32_t cons;
+       /** Completion counter */
+       uint32_t done;
        /** Tag ring */
        uint8_t *tag;
 
@@ -715,6 +835,8 @@ struct gve_queue_type {
        struct {
                /** In-order queue strides */
                struct gve_queue_stride gqi;
+               /** Out-of-order queue strides */
+               struct gve_queue_stride dqo;
        } stride;
        /** Command to create queue */
        uint8_t create;
@@ -754,6 +876,8 @@ struct gve_nic {
        struct gve_queue rx;
        /** Transmit I/O buffers (indexed by tag) */
        struct io_buffer *tx_iobuf[GVE_TX_FILL];
+       /** Transmit tag chain */
+       uint8_t tx_chain[GVE_TX_FILL];
        /** Transmit tag ring */
        uint8_t tx_tag[GVE_TX_FILL];
        /** Receive tag ring */