]> git.ipfire.org Git - thirdparty/curl.git/commitdiff
ngtcp2: enable Linux GSO
authorTatsuhiro Tsujikawa <tatsuhiro.t@gmail.com>
Mon, 23 May 2022 11:03:05 +0000 (20:03 +0900)
committerDaniel Stenberg <daniel@haxx.se>
Tue, 31 May 2022 14:04:12 +0000 (16:04 +0200)
Enable Linux GSO in ngtcp2 QUIC.  In order to recover from the
EAGAIN/EWOULDBLOCK by sendmsg with multiple packets in one GSO write,
packet buffer is now held by struct quicsocket.  GSO write might fail in
runtime depending on NIC.  Disable GSO if sendmsg returns EIO.

Closes #8909

configure.ac
lib/vquic/ngtcp2.c
lib/vquic/ngtcp2.h

index 1300a7189de072518f7105a363acd9ed3756cdae..32c67263b8b192cc699b6ba9bdc491b34e84de7f 100644 (file)
@@ -3189,6 +3189,7 @@ AC_CHECK_HEADERS(
         sys/un.h \
         linux/tcp.h \
         netinet/tcp.h \
+        netinet/udp.h \
         netdb.h \
         sys/sockio.h \
         sys/stat.h \
index 239e1d160f6a7b960b6103470600b0f140c59c73..d5204ab6aff2411d0168caa3ffc92b7846eb9c2f 100644 (file)
@@ -102,6 +102,10 @@ struct h3out {
   "%DISABLE_TLS13_COMPAT_MODE"
 #endif
 
+/* ngtcp2 default congestion controller does not perform pacing. Limit
+   the maximum packet burst to MAX_PKT_BURST packets. */
+#define MAX_PKT_BURST 10
+
 static CURLcode ng_process_ingress(struct Curl_easy *data,
                                    curl_socket_t sockfd,
                                    struct quicsocket *qs);
@@ -940,6 +944,24 @@ CURLcode Curl_quic_connect(struct Curl_easy *data,
 
   ngtcp2_connection_close_error_default(&qs->last_error);
 
+#if defined(__linux__) && defined(UDP_SEGMENT)
+  qs->no_gso = FALSE;
+#else
+  qs->no_gso = TRUE;
+#endif
+
+  qs->num_blocked_pkt = 0;
+  qs->num_blocked_pkt_sent = 0;
+  memset(&qs->blocked_pkt, 0, sizeof(qs->blocked_pkt));
+
+  qs->pktbuflen = NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE * MAX_PKT_BURST;
+  qs->pktbuf = malloc(qs->pktbuflen);
+  if(!qs->pktbuf) {
+    ngtcp2_conn_del(qs->qconn);
+    qs->qconn = NULL;
+    return CURLE_OUT_OF_MEMORY;
+  }
+
   return CURLE_OK;
 }
 
@@ -1015,6 +1037,7 @@ static void qs_disconnect(struct quicsocket *qs)
     qs->cred = NULL;
   }
 #endif
+  free(qs->pktbuf);
   nghttp3_conn_del(qs->h3conn);
   ngtcp2_conn_del(qs->qconn);
 #ifdef USE_OPENSSL
@@ -1807,14 +1830,172 @@ static CURLcode ng_process_ingress(struct Curl_easy *data,
   return CURLE_OK;
 }
 
+static CURLcode do_sendmsg(size_t *sent, struct Curl_easy *data, int sockfd,
+                           struct quicsocket *qs, const uint8_t *pkt,
+                           size_t pktlen, size_t gsolen);
+
+static CURLcode send_packet_no_gso(size_t *psent, struct Curl_easy *data,
+                                   int sockfd, struct quicsocket *qs,
+                                   const uint8_t *pkt, size_t pktlen,
+                                   size_t gsolen)
+{
+  const uint8_t *p, *end = pkt + pktlen;
+  size_t sent;
+
+  *psent = 0;
+
+  for(p = pkt; p < end; p += gsolen) {
+    size_t len = CURLMIN(gsolen, (size_t)(end - p));
+    CURLcode curlcode = do_sendmsg(&sent, data, sockfd, qs, p, len, len);
+    if(curlcode != CURLE_OK) {
+      return curlcode;
+    }
+    *psent += sent;
+  }
+
+  return CURLE_OK;
+}
+
+static CURLcode do_sendmsg(size_t *psent, struct Curl_easy *data, int sockfd,
+                           struct quicsocket *qs, const uint8_t *pkt,
+                           size_t pktlen, size_t gsolen)
+{
+  struct iovec msg_iov = {(void *)pkt, pktlen};
+  struct msghdr msg = {0};
+  uint8_t msg_ctrl[CMSG_SPACE(sizeof(uint16_t))] = {0};
+  size_t ctrllen = 0;
+  ssize_t sent;
+#if defined(__linux__) && defined(UDP_SEGMENT)
+  struct cmsghdr *cm;
+#endif
+
+  *psent = 0;
+
+  msg.msg_iov = &msg_iov;
+  msg.msg_iovlen = 1;
+
+  msg.msg_control = msg_ctrl;
+  msg.msg_controllen = sizeof(msg_ctrl);
+
+#if defined(__linux__) && defined(UDP_SEGMENT)
+  if(pktlen > gsolen) {
+    ctrllen += CMSG_SPACE(sizeof(uint16_t));
+    cm = CMSG_FIRSTHDR(&msg);
+    cm->cmsg_level = SOL_UDP;
+    cm->cmsg_type = UDP_SEGMENT;
+    cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+    *(uint16_t *)(void *)CMSG_DATA(cm) = gsolen & 0xffff;
+  }
+#endif
+
+  msg.msg_controllen = ctrllen;
+
+  while((sent = sendmsg(sockfd, &msg, 0)) == -1 && SOCKERRNO == EINTR)
+    ;
+
+  if(sent == -1) {
+    switch(SOCKERRNO) {
+    case EAGAIN:
+#if EAGAIN != EWOULDBLOCK
+    case EWOULDBLOCK:
+#endif
+      return CURLE_AGAIN;
+    case EMSGSIZE:
+      /* UDP datagram is too large; caused by PMTUD. Just let it be lost. */
+      break;
+    case EIO:
+      if(pktlen > gsolen) {
+        /* GSO failure */
+        failf(data, "sendmsg() returned %zd (errno %d); disable GSO", sent,
+              SOCKERRNO);
+        qs->no_gso = TRUE;
+        return send_packet_no_gso(psent, data, sockfd, qs, pkt, pktlen,
+                                  gsolen);
+      }
+      /* FALLTHROUGH */
+    default:
+      failf(data, "sendmsg() returned %zd (errno %d)", sent, SOCKERRNO);
+      return CURLE_SEND_ERROR;
+    }
+  }
+
+  assert(pktlen == (size_t)sent);
+
+  *psent = pktlen;
+
+  return CURLE_OK;
+}
+
+static CURLcode send_packet(size_t *psent, struct Curl_easy *data, int sockfd,
+                            struct quicsocket *qs, const uint8_t *pkt,
+                            size_t pktlen, size_t gsolen)
+{
+  if(qs->no_gso && pktlen > gsolen) {
+    return send_packet_no_gso(psent, data, sockfd, qs, pkt, pktlen, gsolen);
+  }
+
+  return do_sendmsg(psent, data, sockfd, qs, pkt, pktlen, gsolen);
+}
+
+static void push_blocked_pkt(struct quicsocket *qs, const uint8_t *pkt,
+                             size_t pktlen, size_t gsolen)
+{
+  struct blocked_pkt *blkpkt;
+
+  assert(qs->num_blocked_pkt <
+         sizeof(qs->blocked_pkt) / sizeof(qs->blocked_pkt[0]));
+
+  blkpkt = &qs->blocked_pkt[qs->num_blocked_pkt++];
+
+  blkpkt->pkt = pkt;
+  blkpkt->pktlen = pktlen;
+  blkpkt->gsolen = gsolen;
+}
+
+static CURLcode send_blocked_pkt(struct Curl_easy *data, int sockfd,
+                                 struct quicsocket *qs)
+{
+  size_t sent;
+  CURLcode curlcode;
+  struct blocked_pkt *blkpkt;
+
+  for(; qs->num_blocked_pkt_sent < qs->num_blocked_pkt;
+      ++qs->num_blocked_pkt_sent) {
+    blkpkt = &qs->blocked_pkt[qs->num_blocked_pkt_sent];
+    curlcode = send_packet(&sent, data, sockfd, qs, blkpkt->pkt,
+                           blkpkt->pktlen, blkpkt->gsolen);
+
+    if(curlcode) {
+      if(curlcode == CURLE_AGAIN) {
+        blkpkt->pkt += sent;
+        blkpkt->pktlen -= sent;
+      }
+      return curlcode;
+    }
+  }
+
+  qs->num_blocked_pkt = 0;
+  qs->num_blocked_pkt_sent = 0;
+
+  return CURLE_OK;
+}
+
 static CURLcode ng_flush_egress(struct Curl_easy *data,
                                 int sockfd,
                                 struct quicsocket *qs)
 {
   int rv;
-  ssize_t sent;
+  size_t sent;
   ngtcp2_ssize outlen;
-  uint8_t out[NGTCP2_MAX_PMTUD_UDP_PAYLOAD_SIZE];
+  uint8_t *outpos = qs->pktbuf;
+  size_t max_udp_payload_size =
+      ngtcp2_conn_get_max_udp_payload_size(qs->qconn);
+  size_t path_max_udp_payload_size =
+      ngtcp2_conn_get_path_max_udp_payload_size(qs->qconn);
+  size_t max_pktcnt =
+      CURLMIN(MAX_PKT_BURST, qs->pktbuflen / max_udp_payload_size);
+  size_t pktcnt = 0;
+  size_t gsolen;
   ngtcp2_path_storage ps;
   ngtcp2_tstamp ts = timestamp();
   ngtcp2_tstamp expiry;
@@ -1825,6 +2006,7 @@ static CURLcode ng_flush_egress(struct Curl_easy *data,
   nghttp3_vec vec[16];
   ngtcp2_ssize ndatalen;
   uint32_t flags;
+  CURLcode curlcode;
 
   rv = ngtcp2_conn_handle_expiry(qs->qconn, ts);
   if(rv) {
@@ -1835,6 +2017,17 @@ static CURLcode ng_flush_egress(struct Curl_easy *data,
     return CURLE_SEND_ERROR;
   }
 
+  if(qs->num_blocked_pkt) {
+    curlcode = send_blocked_pkt(data, sockfd, qs);
+    if(curlcode) {
+      if(curlcode == CURLE_AGAIN) {
+        Curl_expire(data, 1, EXPIRE_QUIC);
+        return CURLE_OK;
+      }
+      return curlcode;
+    }
+  }
+
   ngtcp2_path_storage_zero(&ps);
 
   for(;;) {
@@ -1857,11 +2050,25 @@ static CURLcode ng_flush_egress(struct Curl_easy *data,
 
     flags = NGTCP2_WRITE_STREAM_FLAG_MORE |
             (fin ? NGTCP2_WRITE_STREAM_FLAG_FIN : 0);
-    outlen = ngtcp2_conn_writev_stream(qs->qconn, &ps.path, NULL, out,
-                                       sizeof(out),
+    outlen = ngtcp2_conn_writev_stream(qs->qconn, &ps.path, NULL, outpos,
+                                       max_udp_payload_size,
                                        &ndatalen, flags, stream_id,
                                        (const ngtcp2_vec *)vec, veccnt, ts);
     if(outlen == 0) {
+      if(outpos != qs->pktbuf) {
+        curlcode = send_packet(&sent, data, sockfd, qs, qs->pktbuf,
+                               outpos - qs->pktbuf, gsolen);
+        if(curlcode) {
+          if(curlcode == CURLE_AGAIN) {
+            push_blocked_pkt(qs, qs->pktbuf + sent, outpos - qs->pktbuf - sent,
+                             gsolen);
+            Curl_expire(data, 1, EXPIRE_QUIC);
+            return CURLE_OK;
+          }
+          return curlcode;
+        }
+      }
+
       break;
     }
     if(outlen < 0) {
@@ -1912,23 +2119,61 @@ static CURLcode ng_flush_egress(struct Curl_easy *data,
       }
     }
 
-    while((sent = send(sockfd, (const char *)out, outlen, 0)) == -1 &&
-          SOCKERRNO == EINTR)
-      ;
+    outpos += outlen;
 
-    if(sent == -1) {
-      if(SOCKERRNO == EAGAIN || SOCKERRNO == EWOULDBLOCK) {
-        /* TODO Cache packet */
-        break;
+    if(pktcnt == 0) {
+      gsolen = outlen;
+    }
+    else if((size_t)outlen > gsolen ||
+            (gsolen > path_max_udp_payload_size &&
+             (size_t)outlen != gsolen)) {
+      /* Packet larger than path_max_udp_payload_size is PMTUD probe
+         packet and it might not be sent because of EMSGSIZE. Send
+         them separately to minimize the loss. */
+      curlcode = send_packet(&sent, data, sockfd, qs, qs->pktbuf,
+                             outpos - outlen - qs->pktbuf, gsolen);
+      if(curlcode) {
+        if(curlcode == CURLE_AGAIN) {
+          push_blocked_pkt(qs, qs->pktbuf + sent,
+                           outpos - outlen - qs->pktbuf - sent, gsolen);
+          push_blocked_pkt(qs, outpos - outlen, outlen, outlen);
+          Curl_expire(data, 1, EXPIRE_QUIC);
+          return CURLE_OK;
+        }
+        return curlcode;
       }
-      else {
-        failf(data, "send() returned %zd (errno %d)", sent,
-              SOCKERRNO);
-        if(SOCKERRNO != EMSGSIZE) {
-          return CURLE_SEND_ERROR;
+      curlcode = send_packet(&sent, data, sockfd, qs, outpos - outlen, outlen,
+                             outlen);
+      if(curlcode) {
+        if(curlcode == CURLE_AGAIN) {
+          assert(0 == sent);
+          push_blocked_pkt(qs, outpos - outlen, outlen, outlen);
+          Curl_expire(data, 1, EXPIRE_QUIC);
+          return CURLE_OK;
+        }
+        return curlcode;
+      }
+
+      pktcnt = 0;
+      outpos = qs->pktbuf;
+      continue;
+    }
+
+    if(++pktcnt >= max_pktcnt || (size_t)outlen < gsolen) {
+      curlcode = send_packet(&sent, data, sockfd, qs, qs->pktbuf,
+                             outpos - qs->pktbuf, gsolen);
+      if(curlcode) {
+        if(curlcode == CURLE_AGAIN) {
+          push_blocked_pkt(qs, qs->pktbuf + sent, outpos - qs->pktbuf - sent,
+                           gsolen);
+          Curl_expire(data, 1, EXPIRE_QUIC);
+          return CURLE_OK;
         }
-        /* UDP datagram is too large; caused by PMTUD. Just let it be lost. */
+        return curlcode;
       }
+
+      pktcnt = 0;
+      outpos = qs->pktbuf;
     }
   }
 
index 068ff4123233d2b56c5d28e635b9f28741f071e0..60fb43e519da0c6c6c95c2b6a393ea4cb74a9a14 100644 (file)
 
 #ifdef USE_NGTCP2
 
+#ifdef HAVE_NETINET_UDP_H
+#include <netinet/udp.h>
+#endif
+
 #include <ngtcp2/ngtcp2.h>
 #include <nghttp3/nghttp3.h>
 #ifdef USE_OPENSSL
 #include <gnutls/gnutls.h>
 #endif
 
+struct blocked_pkt {
+  const uint8_t *pkt;
+  size_t pktlen;
+  size_t gsolen;
+};
+
 struct quicsocket {
   struct connectdata *conn; /* point back to the connection */
   ngtcp2_conn *qconn;
@@ -54,6 +64,15 @@ struct quicsocket {
   uint8_t tls_alert;
   struct sockaddr_storage local_addr;
   socklen_t local_addrlen;
+  bool no_gso;
+  uint8_t *pktbuf;
+  size_t pktbuflen;
+  /* the number of entries in blocked_pkt */
+  size_t num_blocked_pkt;
+  /* the number of processed entries in blocked_pkt */
+  size_t num_blocked_pkt_sent;
+  /* the packets blocked by sendmsg (EAGAIN or EWOULDBLOCK) */
+  struct blocked_pkt blocked_pkt[2];
 
   nghttp3_conn *h3conn;
   nghttp3_settings h3settings;