/* Reset queue */
queue->prod = 0;
queue->cons = 0;
+ queue->done = 0;
memset ( queue->desc.raw, 0, ( queue->count * stride->desc ) );
memset ( queue->cmplt.raw, 0, ( queue->count * stride->cmplt ) );
for ( i = 0 ; i < queue->fill ; i++ )
queue->tag[i] = i;
- /* Pre-populate descriptor offsets */
- buf = ( queue->desc.raw + stride->desc - sizeof ( *buf ) );
- for ( i = 0 ; i < queue->count ; i++ ) {
- tag = ( i & ( queue->fill - 1 ) );
- buf->addr = cpu_to_be64 ( gve_address ( queue, tag ) );
- buf = ( ( ( void * ) buf ) + stride->desc );
+ /* Pre-populate descriptor offsets for in-order queues */
+ if ( ! ( gve->mode & GVE_MODE_DQO ) ) {
+ buf = ( queue->desc.raw + stride->desc - sizeof ( *buf ) );
+ for ( i = 0 ; i < queue->count ; i++ ) {
+ tag = ( i & ( queue->fill - 1 ) );
+ buf->addr = cpu_to_be64 ( gve_address ( queue, tag ) );
+ buf = ( ( ( void * ) buf ) + stride->desc );
+ }
}
/* Construct request */
}
/* Set queue strides and calculate total lengths */
- *stride = type->stride.gqi;
+ *stride = ( ( gve->mode & GVE_MODE_DQO ) ?
+ type->stride.dqo : type->stride.gqi );
desc_len = ( queue->count * stride->desc );
cmplt_len = ( queue->count * stride->cmplt );
res_len = sizeof ( *queue->res );
struct gve_nic *gve = netdev->priv;
struct gve_queue *tx = &gve->tx;
struct gve_gqi_tx_descriptor *gqi;
+ struct gve_dqo_tx_descriptor *dqo;
unsigned int count;
unsigned int index;
unsigned int tag;
+ unsigned int chain;
+ uint32_t doorbell;
size_t frag_len;
size_t offset;
+ size_t next;
size_t len;
/* Do nothing if queues are not yet set up */
}
/* Copy packet to queue pages and populate descriptors */
- offset = 0;
- while ( 1 ) {
+ for ( offset = 0, chain = 0 ; ; offset = next, chain = tag ) {
/* Identify next available buffer */
index = ( tx->prod++ & ( tx->count - 1 ) );
frag_len = GVE_BUF_SIZE;
memcpy ( gve_buffer ( tx, tag ),
( iobuf->data + offset ), frag_len );
+ next = ( offset + frag_len );
/* Populate descriptor */
- gqi = &tx->desc.tx.gqi[index];
- if ( offset ) {
- gqi->type = GVE_GQI_TX_TYPE_CONT;
- gqi->count = 0;
- gqi->total = 0;
+ if ( gve->mode & GVE_MODE_DQO ) {
+
+ /* Out-of-order descriptor */
+ dqo = &tx->desc.tx.dqo[index];
+ dqo->buf.addr =
+ cpu_to_le64 ( gve_address ( tx, tag ) );
+ if ( next == len ) {
+ dqo->type = ( GVE_DQO_TX_TYPE_PACKET |
+ GVE_DQO_TX_TYPE_LAST |
+ GVE_DQO_TX_TYPE_REPORT );
+ dqo->tag.id = tag;
+ dqo->tag.count = count;
+ } else {
+ dqo->type = GVE_DQO_TX_TYPE_PACKET;
+ dqo->tag.id = 0;
+ dqo->tag.count = 0;
+ }
+ dqo->len = cpu_to_le16 ( frag_len );
+ gve->tx_chain[tag] = chain;
+
} else {
- gqi->type = GVE_GQI_TX_TYPE_START;
- gqi->count = count;
- gqi->total = cpu_to_be16 ( len );
+
+ /* In-order descriptor */
+ gqi = &tx->desc.tx.gqi[index];
+ if ( offset ) {
+ gqi->type = GVE_GQI_TX_TYPE_CONT;
+ gqi->count = 0;
+ gqi->total = 0;
+ } else {
+ gqi->type = GVE_GQI_TX_TYPE_START;
+ gqi->count = count;
+ gqi->total = cpu_to_be16 ( len );
+ }
+ gqi->len = cpu_to_be16 ( frag_len );
+
}
- gqi->len = cpu_to_be16 ( frag_len );
- DBGC2 ( gve, "GVE %p TX %#04x %#02x:%#02x len %#04x/%#04x at "
- "%#08zx\n", gve, index, gqi->type, gqi->count,
- be16_to_cpu ( gqi->len ), be16_to_cpu ( gqi->total ),
- gve_offset ( tx, tag ) );
-
- /* Move to next descriptor */
- offset += frag_len;
- if ( offset < len )
- continue;
+ DBGC2 ( gve, "GVE %p TXD %#04x %#02x:%#02x len %#04zx/%#04zx "
+ "at %#08lx\n", gve, index, tag, count, frag_len, len,
+ gve_address ( tx, tag ) );
/* Record I/O buffer against final descriptor */
- gve->tx_iobuf[tag] = iobuf;
- break;
+ if ( next == len ) {
+ gve->tx_iobuf[tag] = iobuf;
+ break;
+ }
}
assert ( ( tx->prod - tx->cons ) <= tx->fill );
/* Ring doorbell */
+ doorbell = tx->prod;
+ if ( gve->mode & GVE_MODE_DQO ) {
+ doorbell &= ( tx->count - 1 );
+ } else {
+ doorbell = bswap_32 ( doorbell );
+ }
wmb();
- writel ( bswap_32 ( tx->prod ), tx->db );
+ writel ( doorbell, tx->db );
return 0;
}
static void gve_poll_tx ( struct net_device *netdev ) {
struct gve_nic *gve = netdev->priv;
struct gve_queue *tx = &gve->tx;
+ struct gve_dqo_tx_completion *dqo;
struct io_buffer *iobuf;
+ unsigned int index;
+ unsigned int gen;
+ unsigned int bit;
unsigned int tag;
uint32_t count;
- /* Read event counter */
- count = be32_to_cpu ( tx->event->count );
-
/* Process transmit completions */
- while ( count != tx->cons ) {
- DBGC2 ( gve, "GVE %p TX %#04x complete\n", gve, tx->cons );
- tag = ( tx->cons % GVE_TX_FILL );
- iobuf = gve->tx_iobuf[tag];
- gve->tx_iobuf[tag] = NULL;
- tx->cons++;
- if ( iobuf )
- netdev_tx_complete ( netdev, iobuf );
+ if ( gve->mode & GVE_MODE_DQO ) {
+
+ /* Out-of-order completions */
+ while ( 1 ) {
+
+ /* Read next possible completion */
+ gen = ( tx->done & tx->count );
+ index = ( tx->done & ( tx->count - 1 ) );
+ dqo = &tx->cmplt.tx.dqo[index];
+
+ /* Check generation bit */
+ bit = ( dqo->flags & GVE_DQO_TXF_GEN );
+ if ( ( !! bit ) == ( !! gen ) )
+ break;
+ tx->done++;
+
+ /* Ignore non-packet completions */
+ if ( ( ! ( dqo->flags & GVE_DQO_TXF_PKT ) ) ||
+ ( dqo->tag.count < 0 ) ) {
+ DBGC2 ( gve, "GVE %p TXC %#04x flags %#02x "
+ "ignored\n", gve, index, dqo->flags );
+ continue;
+ }
+
+ /* Parse completion */
+ tag = dqo->tag.id;
+ count = dqo->tag.count;
+ iobuf = gve->tx_iobuf[tag];
+ gve->tx_iobuf[tag] = NULL;
+ assert ( iobuf != NULL );
+
+ /* Return completed descriptors to ring */
+ while ( count-- ) {
+ DBGC2 ( gve, "GVE %p TXC %#04x %#02x:%#02x "
+ "complete\n", gve, index, tag,
+ dqo->tag.count );
+ tx->tag[ tx->cons++ % GVE_TX_FILL ] = tag;
+ tag = gve->tx_chain[tag];
+ }
+
+ /* Hand off to network stack */
+ if ( iobuf )
+ netdev_tx_complete ( netdev, iobuf );
+ }
+
+ } else {
+
+ /* Read event counter */
+ count = be32_to_cpu ( tx->event->count );
+
+ /* Process transmit completions */
+ while ( count != tx->cons ) {
+ DBGC2 ( gve, "GVE %p TXC %#04x complete\n",
+ gve, tx->cons );
+ tag = ( tx->cons % GVE_TX_FILL );
+ iobuf = gve->tx_iobuf[tag];
+ gve->tx_iobuf[tag] = NULL;
+ tx->cons++;
+ if ( iobuf )
+ netdev_tx_complete ( netdev, iobuf );
+ }
}
}
struct gve_nic *gve = netdev->priv;
struct gve_queue *rx = &gve->rx;
struct gve_gqi_rx_completion *gqi;
+ struct gve_dqo_rx_completion *dqo;
struct io_buffer *iobuf;
unsigned int index;
+ unsigned int gen;
+ unsigned int bit;
unsigned int seq;
unsigned int tag;
- uint32_t cons;
+ uint32_t done;
size_t total;
size_t len;
int rc;
/* Process receive completions */
- cons = rx->cons;
+ done = rx->done;
seq = gve->seq;
total = 0;
while ( 1 ) {
/* Read next possible completion */
- index = ( cons++ & ( rx->count - 1 ) );
- gqi = &rx->cmplt.rx.gqi[index];
+ rc = 0;
+ gen = ( done & rx->count );
+ index = ( done++ & ( rx->count - 1 ) );
+ if ( gve->mode & GVE_MODE_DQO ) {
+
+ /* Out-of-order completion */
+ dqo = &rx->cmplt.rx.dqo[index];
+
+ /* Check generation bit */
+ bit = ( dqo->len & cpu_to_le16 ( GVE_DQO_RXL_GEN ) );
+ if ( ( !! bit ) == ( !! gen ) )
+ break;
+
+ /* Parse completion */
+ len = ( le16_to_cpu ( dqo->len ) &
+ ( GVE_BUF_SIZE - 1 ) );
+ tag = dqo->tag;
+ DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx "
+ "at %#08zx\n", gve, index, tag, dqo->flags,
+ len, gve_offset ( rx, tag ) );
+
+ /* Accumulate a complete packet */
+ if ( dqo->status & GVE_DQO_RXS_ERROR ) {
+ rc = -EIO;
+ total = 0;
+ } else {
+ total += len;
+ if ( ! ( dqo->flags & GVE_DQO_RXF_LAST ) )
+ continue;
+ }
- /* Check sequence number */
- if ( ( gqi->seq & GVE_GQI_RX_SEQ_MASK ) != seq )
- break;
- seq = gve_next ( seq );
-
- /* Parse completion */
- len = be16_to_cpu ( gqi->len );
- tag = ( index % GVE_RX_FILL );
- DBGC2 ( gve, "GVE %p RX %#04x %#02x:%#02x len %#04zx at "
- "%#08zx\n", gve, index, gqi->seq, gqi->flags,
- len, gve_offset ( rx, tag ) );
-
- /* Accumulate a complete packet */
- if ( gqi->flags & GVE_GQI_RXF_ERROR ) {
- total = 0;
} else {
- total += len;
- if ( gqi->flags & GVE_GQI_RXF_MORE )
- continue;
+
+ /* In-order completion */
+ gqi = &rx->cmplt.rx.gqi[index];
+
+ /* Check sequence number */
+ if ( ( gqi->seq & GVE_GQI_RX_SEQ_MASK ) != seq )
+ break;
+ seq = gve_next ( seq );
+
+ /* Parse completion */
+ len = be16_to_cpu ( gqi->len );
+ tag = ( index % GVE_RX_FILL );
+ DBGC2 ( gve, "GVE %p RXC %#04x %#02x:%#02x len %#04zx "
+ "at %#08zx\n", gve, index, gqi->seq,
+ gqi->flags, len, gve_offset ( rx, tag ) );
+
+ /* Accumulate a complete packet */
+ if ( gqi->flags & GVE_GQI_RXF_ERROR ) {
+ rc = -EIO;
+ total = 0;
+ } else {
+ total += len;
+ if ( gqi->flags & GVE_GQI_RXF_MORE )
+ continue;
+ }
+ gve->seq = seq;
}
- gve->seq = seq;
/* Allocate and populate I/O buffer */
iobuf = ( total ? alloc_iob ( total ) : NULL );
- for ( ; rx->cons != cons ; rx->cons++ ) {
-
- /* Re-read completion */
- index = ( rx->cons & ( rx->count - 1 ) );
- gqi = &rx->cmplt.rx.gqi[index];
- tag = ( index % GVE_RX_FILL );
+ for ( ; rx->done != done ; rx->done++ ) {
+
+ /* Re-read completion and return tag to ring */
+ index = ( rx->done & ( rx->count - 1 ) );
+ if ( gve->mode & GVE_MODE_DQO ) {
+ dqo = &rx->cmplt.rx.dqo[index];
+ tag = dqo->tag;
+ len = ( le16_to_cpu ( dqo->len ) &
+ ( GVE_BUF_SIZE - 1 ) );
+ rx->tag[ rx->cons++ % GVE_RX_FILL ] = tag;
+ } else {
+ gqi = &rx->cmplt.rx.gqi[index];
+ tag = ( index % GVE_RX_FILL );
+ len = be16_to_cpu ( gqi->len );
+ assert ( rx->cons == rx->done );
+ rx->cons++;
+ }
/* Copy data */
if ( iobuf ) {
- len = be16_to_cpu ( gqi->len );
memcpy ( iob_put ( iobuf, len ),
gve_buffer ( rx, tag ), len );
}
/* Hand off packet to network stack */
if ( iobuf ) {
- iob_pull ( iobuf, GVE_RX_PAD );
+ if ( ! ( gve->mode & GVE_MODE_DQO ) )
+ iob_pull ( iobuf, GVE_GQI_RX_PAD );
netdev_rx ( netdev, iobuf );
} else {
- rc = ( ( gqi->flags & GVE_GQI_RXF_ERROR ) ?
- -EIO : -ENOMEM );
- netdev_rx_err ( netdev, NULL, rc );
+ netdev_rx_err ( netdev, NULL, ( rc ? rc : -ENOMEM ) );
}
-
- /* Sanity check */
- assert ( rx->cons == cons );
- assert ( gve->seq == seq );
- assert ( total == 0 );
}
}
static void gve_refill_rx ( struct net_device *netdev ) {
struct gve_nic *gve = netdev->priv;
struct gve_queue *rx = &gve->rx;
- unsigned int prod;
+ struct gve_dqo_rx_descriptor *dqo;
+ unsigned int refill;
+ unsigned int index;
+ unsigned int tag;
+ uint32_t doorbell;
- /* The receive descriptors are prepopulated at the time of
- * creating the receive queue (pointing to the preallocated
- * queue pages). Refilling is therefore just a case of
- * ringing the doorbell if the device is not yet aware of any
- * available descriptors.
- */
- prod = ( rx->cons + rx->fill );
- if ( prod != rx->prod ) {
- rx->prod = prod;
- writel ( bswap_32 ( prod ), rx->db );
- DBGC2 ( gve, "GVE %p RX %#04x ready\n", gve, rx->prod );
+ /* Calculate refill quantity */
+ doorbell = ( rx->cons + rx->fill );
+ refill = ( doorbell - rx->prod );
+ if ( ! refill )
+ return;
+
+ /* Refill ring */
+ if ( gve->mode & GVE_MODE_DQO ) {
+
+ /* Out-of-order descriptors */
+ while ( refill-- ) {
+
+ /* Identify next available buffer */
+ index = ( rx->prod++ & ( rx->count - 1 ) );
+ tag = rx->tag[ index % GVE_RX_FILL ];
+
+ /* Populate descriptor */
+ dqo = &rx->desc.rx.dqo[index];
+ dqo->tag = tag;
+ dqo->buf.addr =
+ cpu_to_le64 ( gve_address ( rx, tag ) );
+ DBGC2 ( gve, "GVE %p RXD %#04x:%#02x at %#08llx\n",
+ gve, index, dqo->tag,
+ ( ( unsigned long long )
+ le64_to_cpu ( dqo->buf.addr ) ) );
+ }
+ wmb();
+ assert ( rx->prod == doorbell );
+
+ } else {
+
+ /* The in-order receive descriptors are prepopulated
+ * at the time of creating the receive queue (pointing
+ * to the preallocated queue pages). Refilling is
+ * therefore just a case of ringing the doorbell if
+ * the device is not yet aware of any available
+ * descriptors.
+ */
+ rx->prod += refill;
+ assert ( rx->prod == doorbell );
+ DBGC2 ( gve, "GVE %p RXD %#04x ready\n", gve, rx->prod );
+
+ /* Doorbell is big-endian */
+ doorbell = bswap_32 ( doorbell );
}
+
+ /* Ring doorbell */
+ writel ( doorbell, rx->db );
}
/**
.gqi = {
.desc = sizeof ( struct gve_gqi_tx_descriptor ),
},
+ .dqo = {
+ .desc = sizeof ( struct gve_dqo_tx_descriptor ),
+ .cmplt = sizeof ( struct gve_dqo_tx_completion ),
+ },
},
.create = GVE_ADMIN_CREATE_TX,
.destroy = GVE_ADMIN_DESTROY_TX,
.desc = sizeof ( struct gve_gqi_rx_descriptor ),
.cmplt = sizeof ( struct gve_gqi_rx_completion ),
},
+ .dqo = {
+ .desc = sizeof ( struct gve_dqo_rx_descriptor ),
+ .cmplt = sizeof ( struct gve_dqo_rx_completion ),
+ },
},
.create = GVE_ADMIN_CREATE_RX,
.destroy = GVE_ADMIN_DESTROY_RX,
/**
* Maximum number of transmit buffers
*
- * This is a policy decision.
+ * This is a policy decision. Experiments suggest that out-of-order
+ * transmit queues will write completions only in batches of 128
+ * bytes, comprising 8 descriptor completions and 8 packet
+ * completions. The transmit fill level must therefore be greater
+ * than 8, so that completions will be written out before the transmit
+ * ring runs out of space.
*/
-#define GVE_TX_FILL 8
+#define GVE_TX_FILL 16
/** Transmit queue page list ID */
#define GVE_TX_QPL 0x18ae5458
/** Continuation of packet transmit descriptor type */
#define GVE_GQI_TX_TYPE_CONT 0x20
+/** An out-of-order transmit tag
+ *
+ * From the hardware perspective, this is an opaque 15-bit (sic) value
+ * that is simply copied from the descriptor to the corresponding
+ * completion.
+ */
+struct gve_dqo_tx_tag {
+ /** Buffer index within queue page list */
+ uint8_t id;
+ /** Number of descriptors covered by this completion
+ *
+ * Note that this is a 7-bit quantity: the high bit may be
+ * (ab)used by the hardware to indicate that a completion is a
+ * terminologically undefined "miss" completion.
+ */
+ int8_t count;
+} __attribute__ (( packed ));
+
+/** An out-of-order transmit descriptor */
+struct gve_dqo_tx_descriptor {
+ /** Buffer descriptor */
+ struct gve_buffer buf;
+ /** Descriptor type and flags */
+ uint8_t type;
+ /** Reserved */
+ uint8_t reserved_a[3];
+ /** Tag */
+ struct gve_dqo_tx_tag tag;
+ /** Length of this descriptor */
+ uint16_t len;
+} __attribute__ (( packed ));
+
+/** Normal packet transmit descriptor type */
+#define GVE_DQO_TX_TYPE_PACKET 0x0c
+
+/** Last transmit descriptor in a packet */
+#define GVE_DQO_TX_TYPE_LAST 0x20
+
+/** Report transmit completion */
+#define GVE_DQO_TX_TYPE_REPORT 0x80
+
+/** An out-of-order transmit completion */
+struct gve_dqo_tx_completion {
+ /** Reserved */
+ uint8_t reserved_a[1];
+ /** Completion flags */
+ uint8_t flags;
+ /** Tag */
+ struct gve_dqo_tx_tag tag;
+ /** Reserved */
+ uint8_t reserved_b[4];
+} __attribute__ (( packed ));
+
+/** Transmit completion packet flag */
+#define GVE_DQO_TXF_PKT 0x10
+
+/** Transmit completion generation flag */
+#define GVE_DQO_TXF_GEN 0x80
+
/**
* Maximum number of receive buffers
*
} __attribute__ (( packed ));
/** Padding at the start of all received packets */
-#define GVE_RX_PAD 2
+#define GVE_GQI_RX_PAD 2
+
+/** An out-of-order receive descriptor */
+struct gve_dqo_rx_descriptor {
+ /** Tag */
+ uint8_t tag;
+ /** Reserved */
+ uint8_t reserved_a[7];
+ /** Buffer descriptor */
+ struct gve_buffer buf;
+ /** Reserved */
+ uint8_t reserved_b[16];
+} __attribute__ (( packed ));
+
+/** An out-of-order receive completion */
+struct gve_dqo_rx_completion {
+ /** Reserved */
+ uint8_t reserved_a[1];
+ /** Status */
+ uint8_t status;
+ /** Reserved */
+ uint8_t reserved_b[2];
+ /** Length and generation bit */
+ uint16_t len;
+ /** Reserved */
+ uint8_t reserved_c[2];
+ /** Flags */
+ uint8_t flags;
+ /** Reserved */
+ uint8_t reserved_d[3];
+ /** Tag */
+ uint8_t tag;
+ /** Reserved */
+ uint8_t reserved_e[19];
+} __attribute__ (( packed ));
+
+/** Receive error */
+#define GVE_DQO_RXS_ERROR 0x04
+
+/** Receive completion generation flag */
+#define GVE_DQO_RXL_GEN 0x4000
+
+/** Last receive descriptor in a packet */
+#define GVE_DQO_RXF_LAST 0x02
/** Queue strides */
struct gve_queue_stride {
union {
/** In-order transmit descriptors */
struct gve_gqi_tx_descriptor *gqi;
+ /** Out-of-order transmit descriptors */
+ struct gve_dqo_tx_descriptor *dqo;
} tx;
/** Receive descriptors */
union {
/** In-order receive descriptors */
struct gve_gqi_rx_descriptor *gqi;
+ /** Out-of-order receive descriptors */
+ struct gve_dqo_rx_descriptor *dqo;
} rx;
/** Raw data */
void *raw;
} desc;
/** Completion ring */
union {
+ /** Transmit completions */
+ union {
+ /** Out-of-order transmit completions */
+ struct gve_dqo_tx_completion *dqo;
+ } tx;
/** Receive completions */
union {
/** In-order receive completions */
struct gve_gqi_rx_completion *gqi;
+ /** Out-of-order receive completions */
+ struct gve_dqo_rx_completion *dqo;
} rx;
/** Raw data */
void *raw;
uint32_t prod;
/** Consumer counter */
uint32_t cons;
+ /** Completion counter */
+ uint32_t done;
/** Tag ring */
uint8_t *tag;
struct {
/** In-order queue strides */
struct gve_queue_stride gqi;
+ /** Out-of-order queue strides */
+ struct gve_queue_stride dqo;
} stride;
/** Command to create queue */
uint8_t create;
struct gve_queue rx;
/** Transmit I/O buffers (indexed by tag) */
struct io_buffer *tx_iobuf[GVE_TX_FILL];
+ /** Transmit tag chain */
+ uint8_t tx_chain[GVE_TX_FILL];
/** Transmit tag ring */
uint8_t tx_tag[GVE_TX_FILL];
/** Receive tag ring */