From: Stefan Eissing Date: Thu, 2 Oct 2025 12:20:05 +0000 (+0200) Subject: vquic: handling of io improvements X-Git-Tag: rc-8_17_0-3~365 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=9aa8e9a783c43ed3fad244a98455ac2d7288909f;p=thirdparty%2Fcurl.git vquic: handling of io improvements - better tracing of what system call is used and how often - ngtcp2: combine vquic_send into larger chunks - ngtcp2: define own PMTU values and enable MTU probing - ngtcp2: trace interesting remote transport parameters Closes #18812 --- diff --git a/lib/vquic/curl_ngtcp2.c b/lib/vquic/curl_ngtcp2.c index 4998400d96..4a45c1f6db 100644 --- a/lib/vquic/curl_ngtcp2.c +++ b/lib/vquic/curl_ngtcp2.c @@ -410,13 +410,23 @@ static void qlog_callback(void *user_data, uint32_t flags, ctx->qlogfd = -1; } } - } static void quic_settings(struct cf_ngtcp2_ctx *ctx, struct Curl_easy *data, struct pkt_io_ctx *pktx) { +#ifdef NGTCP2_SETTINGS_V2x +static uint16_t mtu_probes[] = { + 1472, /* what h2o offers */ + 1452, /* what Caddy offers */ + 1454 - 48, /* The well known MTU used by a domestic optic fiber + service in Japan. */ + 1390 - 48, /* Typical Tunneled MTU */ + 1280 - 48, /* IPv6 minimum MTU */ + 1492 - 48, /* PPPoE */ +}; +#endif ngtcp2_settings *s = &ctx->settings; ngtcp2_transport_params *t = &ctx->transport_params; @@ -433,6 +443,12 @@ static void quic_settings(struct cf_ngtcp2_ctx *ctx, data->set.connecttimeout * NGTCP2_MILLISECONDS : QUIC_HANDSHAKE_TIMEOUT; s->max_window = 100 * ctx->max_stream_window; s->max_stream_window = 10 * ctx->max_stream_window; + s->no_pmtud = FALSE; +#ifdef NGTCP2_SETTINGS_V2x + s->pmtud_probes = mtu_probes; + s->pmtud_probeslen = CURL_ARRAYSIZE(mtu_probes); + s->max_tx_udp_payload_size = 64 * 1024; /* mtu_probes[0]; */ +#endif t->initial_max_data = 10 * ctx->max_stream_window; t->initial_max_stream_data_bidi_local = ctx->max_stream_window; @@ -468,8 +484,18 @@ static int cf_ngtcp2_handshake_completed(ngtcp2_conn *tconn, void *user_data) ctx->tls_vrfy_result = Curl_vquic_tls_verify_peer(&ctx->tls, cf, data, &ctx->peer); - CURL_TRC_CF(data, cf, "handshake complete after %dms", - (int)curlx_timediff(ctx->handshake_at, ctx->started_at)); + if(Curl_trc_is_verbose(data)) { + const ngtcp2_transport_params *rp; + rp = ngtcp2_conn_get_remote_transport_params(ctx->qconn); + CURL_TRC_CF(data, cf, "handshake complete after %dms, remote transport[" + "max_udp_payload=%" FMT_PRIu64 + ", initial_max_data=%" FMT_PRIu64 + "]", + (int)curlx_timediff(ctx->handshake_at, ctx->started_at), + (curl_uint64_t)rp->max_udp_payload_size, + (curl_uint64_t)rp->initial_max_data); + } + /* In case of earlydata, where we simulate being connected, update * the handshake time when we really did connect */ if(ctx->use_earlydata) @@ -1678,6 +1704,9 @@ static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen, ngtcp2_path path; int rv; + if(ecn) + CURL_TRC_CF(pktx->data, pktx->cf, "vquic_recv(len=%zu, ecn=%x)", + pktlen, ecn); ngtcp2_addr_init(&path.local, (struct sockaddr *)&ctx->q.local_addr, (socklen_t)ctx->q.local_addrlen); ngtcp2_addr_init(&path.remote, (struct sockaddr *)remote_addr, @@ -1696,7 +1725,6 @@ static CURLcode recv_pkt(const unsigned char *pkt, size_t pktlen, return CURLE_PEER_FAILED_VERIFICATION; return CURLE_RECV_ERROR; } - return CURLE_OK; } @@ -1712,6 +1740,10 @@ static CURLcode cf_progress_ingress(struct Curl_cfilter *cf, pktx_init(&local_pktx, cf, data); pktx = &local_pktx; } + else { + pktx_update_time(pktx, cf); + ngtcp2_path_storage_zero(&pktx->ps); + } result = Curl_vquic_tls_before_recv(&ctx->tls, cf, data); if(result) @@ -1831,9 +1863,10 @@ static CURLcode cf_progress_egress(struct Curl_cfilter *cf, { struct cf_ngtcp2_ctx *ctx = cf->ctx; size_t nread; - size_t max_payload_size, path_max_payload_size, max_pktcnt; + size_t max_payload_size, path_max_payload_size; size_t pktcnt = 0; size_t gsolen = 0; /* this disables gso until we have a clue */ + size_t send_quantum; CURLcode curlcode; struct pkt_io_ctx local_pktx; @@ -1869,71 +1902,69 @@ static CURLcode cf_progress_egress(struct Curl_cfilter *cf, max_payload_size = ngtcp2_conn_get_max_tx_udp_payload_size(ctx->qconn); path_max_payload_size = ngtcp2_conn_get_path_max_tx_udp_payload_size(ctx->qconn); - /* maximum number of packets buffered before we flush to the socket */ - max_pktcnt = CURLMIN(MAX_PKT_BURST, - ctx->q.sendbuf.chunk_size / max_payload_size); - + send_quantum = ngtcp2_conn_get_send_quantum(ctx->qconn); + CURL_TRC_CF(data, cf, "egress, collect and send packets, quantum=%zu", + send_quantum); for(;;) { /* add the next packet to send, if any, to our buffer */ curlcode = Curl_bufq_sipn(&ctx->q.sendbuf, max_payload_size, read_pkt_to_send, pktx, &nread); - if(curlcode) { - if(curlcode != CURLE_AGAIN) - return curlcode; - /* Nothing more to add, flush and leave */ - curlcode = vquic_send(cf, data, &ctx->q, gsolen); - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; - } - return curlcode; + if(curlcode == CURLE_AGAIN) + break; + else if(curlcode) + return curlcode; + else { + size_t buflen = Curl_bufq_len(&ctx->q.sendbuf); + if((buflen >= send_quantum) || + ((buflen + gsolen) >= ctx->q.sendbuf.chunk_size)) + break; + DEBUGASSERT(nread > 0); + ++pktcnt; + if(pktcnt == 1) { + /* first packet in buffer. This is either of a known, "good" + * payload size or it is a PMTUD. We will see. */ + gsolen = nread; } - goto out; - } - - DEBUGASSERT(nread > 0); - if(pktcnt == 0) { - /* first packet in buffer. This is either of a known, "good" - * payload size or it is a PMTUD. We will see. */ - gsolen = nread; - } - else if(nread > gsolen || - (gsolen > path_max_payload_size && nread != gsolen)) { - /* The just added packet is a PMTUD *or* the one(s) before the - * just added were PMTUD and the last one is smaller. - * Flush the buffer before the last add. */ - curlcode = vquic_send_tail_split(cf, data, &ctx->q, - gsolen, nread, nread); - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; + else if(nread > gsolen || + (gsolen > path_max_payload_size && nread != gsolen)) { + /* The just added packet is a PMTUD *or* the one(s) before the + * just added were PMTUD and the last one is smaller. + * Flush the buffer before the last add. */ + curlcode = vquic_send_tail_split(cf, data, &ctx->q, + gsolen, nread, nread); + if(curlcode) { + if(curlcode == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; + } + return curlcode; } - return curlcode; + pktcnt = 0; + } + else if(nread < gsolen) { + /* Reached MAX_PKT_BURST *or* + * the capacity of our buffer *or* + * last add was shorter than the previous ones, flush */ + break; } - pktcnt = 0; - continue; } + } - if(++pktcnt >= max_pktcnt || nread < gsolen) { - /* Reached MAX_PKT_BURST *or* - * the capacity of our buffer *or* - * last add was shorter than the previous ones, flush */ - curlcode = vquic_send(cf, data, &ctx->q, gsolen); - if(curlcode) { - if(curlcode == CURLE_AGAIN) { - Curl_expire(data, 1, EXPIRE_QUIC); - return CURLE_OK; - } - return curlcode; + if(!Curl_bufq_is_empty(&ctx->q.sendbuf)) { + /* time to send */ + CURL_TRC_CF(data, cf, "egress, send collected %zu packets in %zu bytes", + pktcnt, Curl_bufq_len(&ctx->q.sendbuf)); + curlcode = vquic_send(cf, data, &ctx->q, gsolen); + if(curlcode) { + if(curlcode == CURLE_AGAIN) { + Curl_expire(data, 1, EXPIRE_QUIC); + return CURLE_OK; } - /* pktbuf has been completely sent */ - pktcnt = 0; + return curlcode; } + pktx_update_time(pktx, cf); + ngtcp2_conn_update_pkt_tx_time(ctx->qconn, pktx->ts); } - -out: return CURLE_OK; } diff --git a/lib/vquic/vquic.c b/lib/vquic/vquic.c index c509819752..275ea8bccc 100644 --- a/lib/vquic/vquic.c +++ b/lib/vquic/vquic.c @@ -55,7 +55,7 @@ #if !defined(CURL_DISABLE_HTTP) && defined(USE_HTTP3) #define NW_CHUNK_SIZE (64 * 1024) -#define NW_SEND_CHUNKS 2 +#define NW_SEND_CHUNKS 1 int Curl_vquic_init(void) @@ -125,6 +125,7 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf, const uint8_t *pkt, size_t pktlen, size_t gsolen, size_t *psent) { + CURLcode result = CURLE_OK; #ifdef HAVE_SENDMSG struct iovec msg_iov; struct msghdr msg = {0}; @@ -181,12 +182,14 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf, FALLTHROUGH(); default: failf(data, "sendmsg() returned %zd (errno %d)", sent, SOCKERRNO); - return CURLE_SEND_ERROR; + result = CURLE_SEND_ERROR; + goto out; } } else if(pktlen != (size_t)sent) { failf(data, "sendmsg() sent only %zd/%zu bytes", sent, pktlen); - return CURLE_SEND_ERROR; + result = CURLE_SEND_ERROR; + goto out; } #else ssize_t sent; @@ -201,12 +204,14 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf, if(sent == -1) { if(SOCKERRNO == EAGAIN || SOCKERRNO == SOCKEWOULDBLOCK) { - return CURLE_AGAIN; + result = CURLE_AGAIN; + goto out; } else { failf(data, "send() returned %zd (errno %d)", sent, SOCKERRNO); if(SOCKERRNO != SOCKEMSGSIZE) { - return CURLE_SEND_ERROR; + result = CURLE_SEND_ERROR; + goto out; } /* UDP datagram is too large; caused by PMTUD. Just let it be lost. */ @@ -216,9 +221,16 @@ static CURLcode do_sendmsg(struct Curl_cfilter *cf, (void)cf; *psent = pktlen; - return CURLE_OK; +out: + return result; } +#ifdef HAVE_SENDMSG +#define VQUIC_SEND_METHOD "sendmsg" +#else +#define VQUIC_SEND_METHOD "send" +#endif + static CURLcode send_packet_no_gso(struct Curl_cfilter *cf, struct Curl_easy *data, struct cf_quic_ctx *qctx, @@ -226,19 +238,23 @@ static CURLcode send_packet_no_gso(struct Curl_cfilter *cf, size_t gsolen, size_t *psent) { const uint8_t *p, *end = pkt + pktlen; - size_t sent; + size_t sent, len, calls = 0; + CURLcode result = CURLE_OK; *psent = 0; for(p = pkt; p < end; p += gsolen) { - size_t len = CURLMIN(gsolen, (size_t)(end - p)); - CURLcode curlcode = do_sendmsg(cf, data, qctx, p, len, len, &sent); - if(curlcode != CURLE_OK) { - return curlcode; - } + len = CURLMIN(gsolen, (size_t)(end - p)); + result = do_sendmsg(cf, data, qctx, p, len, len, &sent); + if(result) + goto out; *psent += sent; + ++calls; } - +out: + CURL_TRC_CF(data, cf, "vquic_%s(len=%zu, gso=%zu, calls=%zu)" + " -> %d, sent=%zu", + VQUIC_SEND_METHOD, pktlen, gsolen, calls, result, *psent); return CURLE_OK; } @@ -266,6 +282,9 @@ static CURLcode vquic_send_packets(struct Curl_cfilter *cf, } else { result = do_sendmsg(cf, data, qctx, pkt, pktlen, gsolen, psent); + CURL_TRC_CF(data, cf, "vquic_%s(len=%zu, gso=%zu, calls=1)" + " -> %d, sent=%zu", + VQUIC_SEND_METHOD, pktlen, gsolen, result, *psent); } if(!result) qctx->last_io = qctx->last_op; @@ -289,8 +308,6 @@ CURLcode vquic_flush(struct Curl_cfilter *cf, struct Curl_easy *data, } result = vquic_send_packets(cf, data, qctx, buf, blen, gsolen, &sent); - CURL_TRC_CF(data, cf, "vquic_send(len=%zu, gso=%zu) -> %d, sent=%zu", - blen, gsolen, result, sent); if(result) { if(result == CURLE_AGAIN) { Curl_bufq_skip(&qctx->sendbuf, sent); @@ -369,7 +386,7 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf, struct mmsghdr mmsg[MMSG_NUM]; uint8_t msg_ctrl[MMSG_NUM * CMSG_SPACE(sizeof(int))]; struct sockaddr_storage remote_addr[MMSG_NUM]; - size_t total_nread = 0, pkts = 0; + size_t total_nread = 0, pkts = 0, calls = 0; int mcount, i, n; char errstr[STRERROR_LEN]; CURLcode result = CURLE_OK; @@ -424,7 +441,7 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf, goto out; } - CURL_TRC_CF(data, cf, "recvmmsg() -> %d packets", mcount); + ++calls; for(i = 0; i < mcount; ++i) { total_nread += mmsg[i].msg_len; @@ -454,8 +471,8 @@ static CURLcode recvmmsg_packets(struct Curl_cfilter *cf, out: if(total_nread || result) - CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d", - pkts, total_nread, result); + CURL_TRC_CF(data, cf, "vquic_recvmmsg(len=%zu, packets=%zu, calls=%zu)" + " -> %d", total_nread, pkts, calls, result); Curl_multi_xfer_sockbuf_release(data, sockbuf); return result; } @@ -471,8 +488,9 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf, struct msghdr msg; uint8_t buf[64*1024]; struct sockaddr_storage remote_addr; - size_t total_nread, pkts; - ssize_t nread; + size_t total_nread, pkts, calls; + ssize_t rc; + size_t nread; char errstr[STRERROR_LEN]; CURLcode result = CURLE_OK; uint8_t msg_ctrl[CMSG_SPACE(sizeof(int))]; @@ -481,7 +499,7 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf, size_t offset, to; DEBUGASSERT(max_pkts > 0); - for(pkts = 0, total_nread = 0; pkts < max_pkts;) { + for(pkts = 0, total_nread = 0, calls = 0; pkts < max_pkts;) { /* fully initialise this on each call to `recvmsg()`. There seem to * operating systems out there that mess with `msg_iov.iov_len`. */ memset(&msg, 0, sizeof(msg)); @@ -494,10 +512,10 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf, msg.msg_namelen = sizeof(remote_addr); msg.msg_controllen = sizeof(msg_ctrl); - while((nread = recvmsg(qctx->sockfd, &msg, 0)) == -1 && + while((rc = recvmsg(qctx->sockfd, &msg, 0)) == -1 && (SOCKERRNO == SOCKEINTR || SOCKERRNO == SOCKEMSGSIZE)) ; - if(nread == -1) { + if(rc == -1) { if(SOCKERRNO == EAGAIN || SOCKERRNO == SOCKEWOULDBLOCK) { goto out; } @@ -511,28 +529,28 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf, } Curl_strerror(SOCKERRNO, errstr, sizeof(errstr)); failf(data, "QUIC: recvmsg() unexpectedly returned %zd (errno=%d; %s)", - nread, SOCKERRNO, errstr); + rc, SOCKERRNO, errstr); result = CURLE_RECV_ERROR; goto out; } - total_nread += (size_t)nread; + nread = (size_t)rc; + total_nread += nread; + ++calls; gso_size = vquic_msghdr_get_udp_gro(&msg); if(gso_size == 0) { - gso_size = (size_t)nread; + gso_size = nread; } - for(offset = 0; offset < (size_t)nread; offset = to) { + for(offset = 0; offset < nread; offset = to) { ++pkts; to = offset + gso_size; - if(to > (size_t)nread) { - pktlen = (size_t)nread - offset; - } - else { + if(to > nread) + pktlen = nread - offset; + else pktlen = gso_size; - } result = recv_cb(buf + offset, pktlen, msg.msg_name, msg.msg_namelen, 0, userp); @@ -543,8 +561,8 @@ static CURLcode recvmsg_packets(struct Curl_cfilter *cf, out: if(total_nread || result) - CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d", - pkts, total_nread, result); + CURL_TRC_CF(data, cf, "vquic_recvmsg(len=%zu, packets=%zu, calls=%zu)" + " -> %d", total_nread, pkts, calls, result); return result; } @@ -559,7 +577,7 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf, int bufsize = (int)sizeof(buf); struct sockaddr_storage remote_addr; socklen_t remote_addrlen = sizeof(remote_addr); - size_t total_nread, pkts; + size_t total_nread, pkts, calls = 0; ssize_t nread; char errstr[STRERROR_LEN]; CURLcode result = CURLE_OK; @@ -592,6 +610,7 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf, } ++pkts; + ++calls; total_nread += (size_t)nread; result = recv_cb(buf, (size_t)nread, &remote_addr, remote_addrlen, 0, userp); @@ -601,8 +620,8 @@ static CURLcode recvfrom_packets(struct Curl_cfilter *cf, out: if(total_nread || result) - CURL_TRC_CF(data, cf, "recvd %zu packets with %zu bytes -> %d", - pkts, total_nread, result); + CURL_TRC_CF(data, cf, "vquic_recvfrom(len=%zu, packets=%zu, calls=%zu)" + " -> %d", total_nread, pkts, calls, result); return result; } #endif /* !HAVE_SENDMMSG && !HAVE_SENDMSG */