[ena] Add support for low latency transmit queues

author Michael Brown <mcb30@ipxe.org>

Thu, 16 Oct 2025 14:58:23 +0000 (15:58 +0100)

committer Michael Brown <mcb30@ipxe.org>

Thu, 16 Oct 2025 15:36:29 +0000 (16:36 +0100)
author Michael Brown <mcb30@ipxe.org>
Thu, 16 Oct 2025 14:58:23 +0000 (15:58 +0100)
committer Michael Brown <mcb30@ipxe.org>
Thu, 16 Oct 2025 15:36:29 +0000 (16:36 +0100)
diff --git a/src/drivers/net/ena.c b/src/drivers/net/ena.c

index ebaeeb7484ffd580939922ca5eca4710546d36c1..b90a57c07b98040602b8341072af8f413eec4fc9 100644 (file)
--- a/src/drivers/net/ena.c
+++ b/src/drivers/net/ena.c
@@ -450,6 +450,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
         union ena_aq_req *req;
         union ena_acq_rsp *rsp;
         unsigned int i;
+       size_t llqe;
         int rc;
  
         /* Allocate submission queue entries */
@@ -464,8 +465,7 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
         req = ena_admin_req ( ena );
         req->header.opcode = ENA_CREATE_SQ;
         req->create_sq.direction = sq->direction;
-       req->create_sq.policy = cpu_to_le16 ( ENA_SQ_HOST_MEMORY |
-                                             ENA_SQ_CONTIGUOUS );
+       req->create_sq.policy = cpu_to_le16 ( sq->policy );
         req->create_sq.cq_id = cpu_to_le16 ( cq->id );
         req->create_sq.count = cpu_to_le16 ( sq->count );
         req->create_sq.address = cpu_to_le64 ( virt_to_bus ( sq->sqe.raw ) );
@@ -480,6 +480,14 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
         /* Parse response */
         sq->id = le16_to_cpu ( rsp->create_sq.id );
         sq->doorbell = le32_to_cpu ( rsp->create_sq.doorbell );
+       llqe = le32_to_cpu ( rsp->create_sq.llqe );
+       if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
+               assert ( ena->mem != NULL );
+               assert ( sq->len >= sizeof ( *sq->sqe.llq ) );
+               sq->llqe = ( ena->mem + llqe );
+       } else {
+               sq->llqe = NULL;
+       }
  
         /* Reset producer counter and phase */
         sq->prod = 0;
@@ -494,10 +502,16 @@ static int ena_create_sq ( struct ena_nic *ena, struct ena_sq *sq,
         for ( i = 0 ; i < sq->count ; i++ )
                 sq->ids[i] = i;
  
-       DBGC ( ena, "ENA %p %s SQ%d at [%08lx,%08lx) fill %d db +%04x CQ%d\n",
-              ena, ena_direction ( sq->direction ), sq->id,
-              virt_to_phys ( sq->sqe.raw ),
-              ( virt_to_phys ( sq->sqe.raw ) + sq->len ),
+       DBGC ( ena, "ENA %p %s SQ%d at ",
+              ena, ena_direction ( sq->direction ), sq->id );
+       if ( sq->policy & ENA_SQ_DEVICE_MEMORY ) {
+               DBGC ( ena, "LLQ [+%08zx,+%08zx)", llqe,
+                      ( llqe + ( sq->count * sizeof ( sq->sqe.llq[0] ) ) ) );
+       } else {
+               DBGC ( ena, "[%08lx,%08lx)", virt_to_phys ( sq->sqe.raw ),
+                      ( virt_to_phys ( sq->sqe.raw ) + sq->len ) );
+       }
+       DBGC ( ena, " fill %d db +%04x CQ%d\n",
                sq->fill, sq->doorbell, cq->id );
         return 0;
  
@@ -744,6 +758,101 @@ static int ena_set_host_attributes ( struct ena_nic *ena ) {
         return 0;
  }
  
+/**
+ * Configure low latency queues
+ *
+ * @v ena              ENA device
+ * @ret rc             Return status code
+ */
+static int ena_llq_config ( struct ena_nic *ena ) {
+       union ena_aq_req *req;
+       union ena_acq_rsp *rsp;
+       union ena_feature *feature;
+       uint16_t header;
+       uint16_t size;
+       uint16_t desc;
+       uint16_t stride;
+       uint16_t mode;
+       int rc;
+
+       /* Construct request */
+       req = ena_admin_req ( ena );
+       req->header.opcode = ENA_GET_FEATURE;
+       req->get_feature.id = ENA_LLQ_CONFIG;
+
+       /* Issue request */
+       if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
+               DBGC ( ena, "ENA %p could not get LLQ configuration: %s\n",
+                      ena, strerror ( rc ) );
+               return rc;
+       }
+
+       /* Parse response */
+       feature = &rsp->get_feature.feature;
+       header = le16_to_cpu ( feature->llq.header.supported );
+       size = le16_to_cpu ( feature->llq.size.supported );
+       desc = le16_to_cpu ( feature->llq.desc.supported );
+       stride = le16_to_cpu ( feature->llq.stride.supported );
+       mode = le16_to_cpu ( feature->llq.mode );
+       DBGC ( ena, "ENA %p LLQ supports %02x:%02x:%02x:%02x:%02x with %dx%d "
+              "entries\n", ena, header, size, desc, stride, mode,
+              le32_to_cpu ( feature->llq.queues ),
+              le32_to_cpu ( feature->llq.count ) );
+
+       /* Check for a supported configuration */
+       if ( ! feature->llq.queues ) {
+               DBGC ( ena, "ENA %p LLQ has no queues\n", ena );
+               return -ENOTSUP;
+       }
+       if ( ! ( header & ENA_LLQ_HEADER_INLINE ) ) {
+               DBGC ( ena, "ENA %p LLQ does not support inline headers\n",
+                      ena );
+               return -ENOTSUP;
+       }
+       if ( ! ( size & ENA_LLQ_SIZE_128 ) ) {
+               DBGC ( ena, "ENA %p LLQ does not support 128-byte entries\n",
+                      ena );
+               return -ENOTSUP;
+       }
+       if ( ! ( desc & ENA_LLQ_DESC_2 ) ) {
+               DBGC ( ena, "ENA %p LLQ does not support two-descriptor "
+                      "entries\n", ena );
+               return -ENOTSUP;
+       }
+
+       /* Enable a minimal configuration */
+       header = ENA_LLQ_HEADER_INLINE;
+       size = ENA_LLQ_SIZE_128;
+       desc = ENA_LLQ_DESC_2;
+       stride &= ( -stride ); /* Don't care: use first supported option */
+       DBGC ( ena, "ENA %p LLQ enabling %02x:%02x:%02x:%02x:%02x\n",
+              ena, header, size, desc, stride, mode );
+
+       /* Construct request */
+       req = ena_admin_req ( ena );
+       req->header.opcode = ENA_SET_FEATURE;
+       req->set_feature.id = ENA_LLQ_CONFIG;
+       feature = &req->set_feature.feature;
+       feature->llq.header.enabled = cpu_to_le16 ( header );
+       feature->llq.size.enabled = cpu_to_le16 ( size );
+       feature->llq.desc.enabled = cpu_to_le16 ( desc );
+       feature->llq.stride.enabled = cpu_to_le16 ( stride );
+       feature->llq.mode = cpu_to_le16 ( mode );
+
+       /* Issue request */
+       if ( ( rc = ena_admin ( ena, req, &rsp ) ) != 0 ) {
+               DBGC ( ena, "ENA %p could not set LLQ configuration: %s\n",
+                      ena, strerror ( rc ) );
+               return rc;
+       }
+
+       /* Use on-device memory for transmit queue */
+       ena->tx.sq.policy |= ENA_SQ_DEVICE_MEMORY;
+       ena->tx.sq.inlined = sizeof ( ena->tx.sq.sqe.llq->inlined );
+
+       return 0;
+}
+
  /**
   * Get statistics (for debugging)
   *
@@ -954,9 +1063,15 @@ static void ena_close ( struct net_device *netdev ) {
  static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
         struct ena_nic *ena = netdev->priv;
         struct ena_tx_sqe *sqe;
+       struct ena_tx_llqe *llqe;
+       const uint64_t *src;
+       uint64_t *dest;
         physaddr_t address;
         unsigned int index;
         unsigned int id;
+       unsigned int i;
+       uint8_t flags;
+       size_t inlined;
         size_t len;
  
         /* Get next submission queue entry */
@@ -968,17 +1083,50 @@ static int ena_transmit ( struct net_device *netdev, struct io_buffer *iobuf ) {
         sqe = &ena->tx.sq.sqe.tx[index];
         id = ena->tx_ids[index];
  
-       /* Construct submission queue entry */
+       /* Construct submission queue entry values */
         address = virt_to_bus ( iobuf->data );
         len = iob_len ( iobuf );
+       inlined = ena->tx.sq.inlined;
+       if ( inlined > len )
+               inlined = len;
+       len -= inlined;
+       address += inlined;
+       flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
+                 ena->tx.sq.phase );
+
+       /* Prepare low-latency queue bounce buffer, if applicable */
+       llqe = ena->tx.sq.sqe.llq;
+       if ( ena->tx.sq.llqe ) {
+
+               /* Construct zero-information metadata queue entry */
+               llqe->meta.meta = ENA_TX_SQE_META;
+               llqe->meta.flags = ( flags & ~( ENA_SQE_LAST | ENA_SQE_CPL ) );
+
+               /* Copy inlined data */
+               memcpy ( llqe->inlined, iobuf->data, inlined );
+
+               /* Place submission queue entry within bounce buffer */
+               sqe = &llqe->sqe;
+               flags &= ~ENA_SQE_FIRST;
+       }
+
+       /* Construct submission queue entry */
         sqe->len = cpu_to_le16 ( len );
         sqe->id = cpu_to_le16 ( id );
         sqe->address = cpu_to_le64 ( address );
+       sqe->inlined = inlined;
         wmb();
-       sqe->flags = ( ENA_SQE_FIRST | ENA_SQE_LAST | ENA_SQE_CPL |
-                      ena->tx.sq.phase );
+       sqe->flags = flags;
         wmb();
  
+       /* Copy bounce buffer to on-device memory, if applicable */
+       if ( ena->tx.sq.llqe ) {
+               src = ( ( const void * ) llqe );
+               dest = ( ena->tx.sq.llqe + ( index * sizeof ( *llqe ) ) );
+               for ( i = 0 ; i < ( sizeof ( *llqe ) / sizeof ( *src ) ); i++ )
+                       writeq ( *(src++), dest++ );
+       }
+
         /* Increment producer counter */
         ena->tx.sq.prod++;
         if ( ( ena->tx.sq.prod % ENA_TX_COUNT ) == 0 )
@@ -1282,6 +1430,12 @@ static int ena_probe ( struct pci_device *pci ) {
         if ( ( rc = ena_get_device_attributes ( netdev ) ) != 0 )
                 goto err_get_device_attributes;
  
+       /* Attempt to configure low latency queues, if applicable.
+        * Ignore any errors and continue without using LLQs.
+        */
+       if ( ena->mem && ( ena->features & ENA_FEATURE_LLQ ) )
+               ena_llq_config ( ena );
+
         /* Register network device */
         if ( ( rc = register_netdev ( netdev ) ) != 0 )
                 goto err_register_netdev;
diff --git a/src/drivers/net/ena.h b/src/drivers/net/ena.h

index 2e549bdb53701dbee354107aec98b22dec0dbe01..cfdee5e52a8b7218d22a4730bd7de5738f87ceb8 100644 (file)
--- a/src/drivers/net/ena.h
+++ b/src/drivers/net/ena.h
@@ -145,6 +145,62 @@ struct ena_device_attributes {
         uint32_t mtu;
  } __attribute__ (( packed ));
  
+/** Device supports low latency queues */
+#define ENA_FEATURE_LLQ 0x00000010
+
+/** Low latency queue config */
+#define ENA_LLQ_CONFIG 4
+
+/** A low latency queue option */
+struct ena_llq_option {
+       /** Bitmask of supported option values */
+       uint16_t supported;
+       /** Single-entry bitmask of the enabled option value */
+       uint16_t enabled;
+} __attribute__ (( packed ));
+
+/** Low latency queue config */
+struct ena_llq_config {
+       /** Maximum number of low latency queues */
+       uint32_t queues;
+       /** Maximum queue depth */
+       uint32_t count;
+       /** Header locations */
+       struct ena_llq_option header;
+       /** Entry sizes */
+       struct ena_llq_option size;
+       /** Descriptor counts */
+       struct ena_llq_option desc;
+       /** Descriptor strides */
+       struct ena_llq_option stride;
+       /** Reserved */
+       uint8_t reserved_a[4];
+       /** Acceleration mode */
+       uint16_t mode;
+       /** Maximum burst size */
+       uint16_t burst;
+       /** Reserved */
+       uint8_t reserved_b[4];
+} __attribute__ (( packed ));
+
+/** Low latency queue header locations */
+enum ena_llq_header {
+       /** Headers are placed inline immediately after descriptors */
+       ENA_LLQ_HEADER_INLINE = 0x0001,
+};
+
+/** Low latency queue entry sizes */
+enum ena_llq_size {
+       /** Entries are 128 bytes */
+       ENA_LLQ_SIZE_128 = 0x0001,
+};
+
+/** Low latency queue descriptor count */
+enum ena_llq_desc {
+       /** Two descriptors before inline headers */
+       ENA_LLQ_DESC_2 = 0x0002,
+};
+
  /** Async event notification queue config */
  #define ENA_AENQ_CONFIG 26
  
@@ -237,6 +293,8 @@ struct ena_host_info {
  union ena_feature {
         /** Device attributes */
         struct ena_device_attributes device;
+       /** Low latency queue configuration */
+       struct ena_llq_config llq;
         /** Async event notification queue config */
         struct ena_aenq_config aenq;
         /** Host attributes */
@@ -280,6 +338,8 @@ struct ena_create_sq_req {
  enum ena_sq_policy {
         /** Use host memory */
         ENA_SQ_HOST_MEMORY = 0x0001,
+       /** Use on-device memory (must be used in addition to host memory) */
+       ENA_SQ_DEVICE_MEMORY = 0x0002,
         /** Memory is contiguous */
         ENA_SQ_CONTIGUOUS = 0x0100,
  };
@@ -291,13 +351,13 @@ struct ena_create_sq_rsp {
         /** Submission queue identifier */
         uint16_t id;
         /** Reserved */
-       uint8_t reserved[2];
+       uint8_t reserved_a[2];
         /** Doorbell register offset */
         uint32_t doorbell;
         /** LLQ descriptor ring offset */
-       uint32_t llq_desc;
-       /** LLQ header offset */
-       uint32_t llq_data;
+       uint32_t llqe;
+       /** Reserved */
+       uint8_t reserved_b[4];
  } __attribute__ (( packed ));
  
  /** Destroy submission queue */
@@ -563,18 +623,31 @@ struct ena_aenq {
  struct ena_tx_sqe {
         /** Length */
         uint16_t len;
-       /** Reserved */
-       uint8_t reserved_a;
+       /** Metadata flags */
+       uint8_t meta;
         /** Flags */
         uint8_t flags;
         /** Reserved */
         uint8_t reserved_b[3];
         /** Request identifier */
         uint8_t id;
-       /** Address */
-       uint64_t address;
+       /** Address and inlined length */
+       union {
+               /** Address */
+               uint64_t address;
+               /** Inlined length */
+               struct {
+                       /** Reserved */
+                       uint8_t reserved[7];
+                       /** Inlined length */
+                       uint8_t inlined;
+               } __attribute__ (( packed ));
+       } __attribute__ (( packed ));
  } __attribute__ (( packed ));
  
+/** This is a metadata entry */
+#define ENA_TX_SQE_META 0x80
+
  /** Receive submission queue entry */
  struct ena_rx_sqe {
         /** Length */
@@ -637,6 +710,16 @@ struct ena_rx_cqe {
  /** Completion queue ownership phase flag */
  #define ENA_CQE_PHASE 0x01
  
+/** Low latency transmit queue bounce buffer */
+struct ena_tx_llqe {
+       /** Pointless metadata descriptor */
+       struct ena_tx_sqe meta;
+       /** Transmit descriptor */
+       struct ena_tx_sqe sqe;
+       /** Inlined header data */
+       uint8_t inlined[96];
+} __attribute__ (( packed ));
+
  /** Submission queue */
  struct ena_sq {
         /** Entries */
@@ -645,11 +728,15 @@ struct ena_sq {
                 struct ena_tx_sqe *tx;
                 /** Receive submission queue entries */
                 struct ena_rx_sqe *rx;
+               /** Low latency queue bounce buffer */
+               struct ena_tx_llqe *llq;
                 /** Raw data */
                 void *raw;
         } sqe;
         /** Buffer IDs */
         uint8_t *ids;
+       /** Low latency queue base */
+       void *llqe;
         /** Doorbell register offset */
         unsigned int doorbell;
         /** Total length of entries */
@@ -658,6 +745,8 @@ struct ena_sq {
         unsigned int prod;
         /** Phase */
         unsigned int phase;
+       /** Queue policy */
+       uint16_t policy;
         /** Submission queue identifier */
         uint16_t id;
         /** Direction */
@@ -668,6 +757,8 @@ struct ena_sq {
         uint8_t max;
         /** Fill level (limited to completion queue size) */
         uint8_t fill;
+       /** Maximum inline header length */
+       uint8_t inlined;
  };
  
  /**
@@ -685,6 +776,7 @@ ena_sq_init ( struct ena_sq *sq, unsigned int direction, unsigned int count,
               unsigned int max, size_t size, uint8_t *ids ) {
  
         sq->len = ( count * size );
+       sq->policy = ( ENA_SQ_HOST_MEMORY | ENA_SQ_CONTIGUOUS );
         sq->direction = direction;
         sq->count = count;
         sq->max = max;
author	Michael Brown <mcb30@ipxe.org>
	Thu, 16 Oct 2025 14:58:23 +0000 (15:58 +0100)
committer	Michael Brown <mcb30@ipxe.org>
	Thu, 16 Oct 2025 15:36:29 +0000 (16:36 +0100)
src/drivers/net/ena.c		patch \| blob \| blame \| history
src/drivers/net/ena.h		patch \| blob \| blame \| history