From: Frédéric Lécaille Date: Tue, 6 Jul 2021 14:35:52 +0000 (+0200) Subject: MINOR: quic: Make use of TX ring buffers to send QUIC packets X-Git-Tag: v2.5-dev8~105 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=c5b0c93c263b01b440395b29b4c875183acaedfd;p=thirdparty%2Fhaproxy.git MINOR: quic: Make use of TX ring buffers to send QUIC packets We modify the functions responsible of building packets to put these latters in ring buffers (qc_build_hdshk_pkt() during the handshake step, and qc_build_phdshk_apkt() during the post-handshake step). These functions remove a ring buffer from its list to build as much as possible datagrams. Eache datagram is prepended of two field: the datagram length and the first packet in the datagram. We chain the packets belonging to the same datagram in a singly linked list to reach them from the first one: indeed we must modify some members of each packet when we really send them from send_ppkts(). This function is also modified to retrieved the datagram from ring buffers. --- diff --git a/include/haproxy/xprt_quic-t.h b/include/haproxy/xprt_quic-t.h index 850be40e74..6c31c21e43 100644 --- a/include/haproxy/xprt_quic-t.h +++ b/include/haproxy/xprt_quic-t.h @@ -465,6 +465,8 @@ struct quic_rx_strm_frm { struct quic_tx_packet { /* List entry point. */ struct list list; + /* Packet length */ + size_t len; /* This is not the packet length but the length of outstanding data * for in flight TX packet. */ @@ -480,6 +482,8 @@ struct quic_tx_packet { struct quic_pktns *pktns; /* Flags. */ unsigned int flags; + /* Next packet in the same datagram */ + struct quic_tx_packet *next; }; /* Structure to stora enough information about the TX frames. */ diff --git a/src/xprt_quic.c b/src/xprt_quic.c index 1a458cd7c5..d5dcd37690 100644 --- a/src/xprt_quic.c +++ b/src/xprt_quic.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -153,10 +154,10 @@ DECLARE_STATIC_POOL(pool_head_quic_frame, "quic_frame_pool", sizeof(struct quic_ DECLARE_STATIC_POOL(pool_head_quic_arng, "quic_arng_pool", sizeof(struct quic_arng_node)); -static ssize_t qc_build_hdshk_pkt(struct q_buf *buf, struct quic_conn *qc, int pkt_type, - struct quic_enc_level *qel); - -int qc_prep_phdshk_pkts(struct quic_conn *qc); +static struct quic_tx_packet *qc_build_hdshk_pkt(unsigned char **pos, const unsigned char *buf_end, + struct quic_conn *qc, int pkt_type, + struct quic_enc_level *qel, int *err); +int qc_prep_phdshk_pkts(struct qring *qr, struct quic_conn *qc); /* Add traces to depending on TX frame type. */ static inline void chunk_tx_frm_appendf(struct buffer *buf, @@ -1215,7 +1216,7 @@ static inline void free_quic_tx_pkts(struct list *pkts) list_for_each_entry_safe(pkt, tmp, pkts, list) { LIST_DELETE(&pkt->list); eb64_delete(&pkt->pn_node); - pool_free(pool_head_quic_tx_packet, pkt); + quic_tx_packet_refdec(pkt); } } @@ -1264,7 +1265,7 @@ static inline void qc_treat_newly_acked_pkts(struct ssl_sock_ctx *ctx, quic_cc_event(&qc->path->cc, &ev); LIST_DELETE(&pkt->list); eb64_delete(&pkt->pn_node); - pool_free(pool_head_quic_tx_packet, pkt); + quic_tx_packet_refdec(pkt); } } @@ -1303,7 +1304,7 @@ static inline void qc_release_lost_pkts(struct quic_pktns *pktns, } else { if (newest_lost != oldest_lost) - pool_free(pool_head_quic_tx_packet, newest_lost); + quic_tx_packet_refdec(newest_lost); newest_lost = pkt; } } @@ -1312,9 +1313,9 @@ static inline void qc_release_lost_pkts(struct quic_pktns *pktns, /* Sent a packet loss event to the congestion controller. */ qc_cc_loss_event(ctx->conn->qc, lost_bytes, newest_lost->time_sent, newest_lost->time_sent - oldest_lost->time_sent, now_us); - pool_free(pool_head_quic_tx_packet, oldest_lost); + quic_tx_packet_refdec(oldest_lost); if (newest_lost != oldest_lost) - pool_free(pool_head_quic_tx_packet, newest_lost); + quic_tx_packet_refdec(newest_lost); } } @@ -1972,40 +1973,69 @@ static int qc_parse_pkt_frms(struct quic_rx_packet *pkt, struct ssl_sock_ctx *ct return 0; } -/* Prepare as much as possible handshake packets for the QUIC connection - * with as I/O handler context. +/* Write datagram length and first packet address into ring + * buffer. This is the responsability of the caller to check there is enough + * room in . Also increase the write index consequently. + * This function must be called only after having built a correct datagram. + * Always succeeds. + */ +static inline void qc_set_dg(struct cbuf *cbuf, + uint16_t dglen, struct quic_tx_packet *pkt) +{ + write_u16(cb_wr(cbuf), dglen); + write_ptr(cb_wr(cbuf) + sizeof dglen, pkt); + cb_add(cbuf, dglen + sizeof dglen + sizeof pkt); +} + +/* Prepare as much as possible handshake packets into ring buffer for + * the QUIC connection with as I/O handler context, possibly concatenating + * several packets in the same datagram. A header made of two fields is added + * to each datagram: the datagram length followed by the address of the first + * packet in this datagram. * Returns 1 if succeeded, or 0 if something wrong happened. */ -static int qc_prep_hdshk_pkts(struct ssl_sock_ctx *ctx) +static int qc_prep_hdshk_pkts(struct qring *qr, struct ssl_sock_ctx *ctx) { struct quic_conn *qc; enum quic_tls_enc_level tel, next_tel; struct quic_enc_level *qel; - struct q_buf *wbuf; - /* A boolean to flag as reusable, even if not empty. */ - int reuse_wbuf; + struct cbuf *cbuf; + unsigned char *end_buf, *end, *pos, *spos; + struct quic_tx_packet *first_pkt, *cur_pkt, *prv_pkt; + /* length of datagrams */ + uint16_t dglen; + size_t total; + /* Each datagram is prepended with its length followed by the + * address of the first packet in the datagram. + */ + size_t dg_headlen = sizeof dglen + sizeof first_pkt; TRACE_ENTER(QUIC_EV_CONN_PHPKTS, ctx->conn); qc = ctx->conn->qc; if (!quic_get_tls_enc_levels(&tel, &next_tel, qc->state)) { - TRACE_DEVEL("unknown enc. levels", - QUIC_EV_CONN_PHPKTS, ctx->conn); + TRACE_DEVEL("unknown enc. levels", QUIC_EV_CONN_PHPKTS, ctx->conn); goto err; } - reuse_wbuf = 0; - wbuf = q_wbuf(qc); + start: + total = 0; + dglen = 0; qel = &qc->els[tel]; - /* When entering this function, the writer buffer must be empty. - * Most of the time it points to the reader buffer. + cbuf = qr->cbuf; + spos = pos = cb_wr(cbuf); + /* Leave at least bytes at the end of this buffer + * to ensure there is enough room to mark the end of prepared + * contiguous data with a zero length. */ - while ((q_buf_empty(wbuf) || reuse_wbuf)) { - ssize_t ret; + end_buf = pos + cb_contig_space(cbuf) - sizeof dglen; + first_pkt = prv_pkt = NULL; + while (end_buf - pos >= (int)qc->path->mtu + dg_headlen || prv_pkt) { + int err; enum quic_pkt_type pkt_type; TRACE_POINT(QUIC_EV_CONN_PHPKTS, ctx->conn, qel); - /* Do not build any more packet f the TX secrets are not available or - * f there is nothing to send, i.e. if no ACK are required + /* Do not build any more packet if the TX secrets are not available or + * if there is nothing to send, i.e. if no ACK are required * and if there is no more packets to send upon PTO expiration * and if there is no more CRYPTO data available or in flight * congestion control limit is reached for prepared data @@ -2016,29 +2046,45 @@ static int qc_prep_hdshk_pkts(struct ssl_sock_ctx *ctx) (LIST_ISEMPTY(&qel->pktns->tx.frms) || qc->path->prep_in_flight >= qc->path->cwnd))) { TRACE_DEVEL("nothing more to do", QUIC_EV_CONN_PHPKTS, ctx->conn); - /* Consume the buffer if we were supposed to reuse it. */ - if (reuse_wbuf) - wbuf = q_next_wbuf(qc); + /* Set the current datagram as prepared into if + * the was already a correct packet which was previously written. + */ + if (prv_pkt) + qc_set_dg(cbuf, dglen, first_pkt); break; } pkt_type = quic_tls_level_pkt_type(tel); - ret = qc_build_hdshk_pkt(wbuf, qc, pkt_type, qel); - switch (ret) { + if (!prv_pkt) { + /* Leave room for the datagram header */ + pos += dg_headlen; + end = pos + qc->path->mtu; + } + + cur_pkt = qc_build_hdshk_pkt(&pos, end, qc, pkt_type, qel, &err); + switch (err) { case -2: goto err; case -1: - if (!reuse_wbuf) - goto out; - - /* Not enough room in . */ - wbuf = q_next_wbuf(qc); - reuse_wbuf = 0; - continue; - case 0: + /* If there was already a correct packet present, set the + * current datagram as prepared into . + */ + if (prv_pkt) { + qc_set_dg(cbuf, dglen, first_pkt); + goto stop_build; + } goto out; default: - reuse_wbuf = 0; + total += cur_pkt->len; + /* keep trace of the first packet in the datagram */ + if (!first_pkt) + first_pkt = cur_pkt; + /* Attach the current one to the previous one */ + if (prv_pkt) + prv_pkt->next = cur_pkt; + /* Let's say we have to build a new dgram */ + prv_pkt = NULL; + dglen += cur_pkt->len; /* Discard the Initial encryption keys as soon as * a handshake packet could be built. */ @@ -2052,77 +2098,130 @@ static int qc_prep_hdshk_pkts(struct ssl_sock_ctx *ctx) /* Special case for Initial packets: when they have all * been sent, select the next level. */ - if ((LIST_ISEMPTY(&qel->pktns->tx.frms) || qc->els[next_tel].pktns->tx.in_flight) && - tel == QUIC_TLS_ENC_LEVEL_INITIAL) { + if (tel == QUIC_TLS_ENC_LEVEL_INITIAL && + (LIST_ISEMPTY(&qel->pktns->tx.frms) || qc->els[next_tel].pktns->tx.in_flight)) { tel = next_tel; qel = &qc->els[tel]; - if (LIST_ISEMPTY(&qel->pktns->tx.frms)) { - /* If there is no more data for the next level, let's - * consume a buffer. This is the case for a client + if (!LIST_ISEMPTY(&qel->pktns->tx.frms)) { + /* If there is data for the next level, do not + * consume a datagram. This is the case for a client * which sends only one Initial packet, then wait * for additional CRYPTO data from the server to enter the * next level. */ - wbuf = q_next_wbuf(qc); - } - else { - /* Let's try to reuse this buffer. */ - reuse_wbuf = 1; + prv_pkt = cur_pkt; } } - else { - wbuf = q_next_wbuf(qc); + } + + /* If we have to build a new datagram, set the current datagram as + * prepared into . + */ + if (!prv_pkt) { + qc_set_dg(cbuf, dglen, first_pkt); + first_pkt = NULL; + dglen = 0; + } + } + + stop_build: + /* Reset writer index if in front of index */ + if (end_buf - pos < (int)qc->path->mtu + dg_headlen) { + int rd = HA_ATOMIC_LOAD(&cbuf->rd); + + TRACE_DEVEL("buffer full", QUIC_EV_CONN_PHPKTS, ctx->conn); + if (cb_contig_space(cbuf) >= sizeof(uint16_t)) { + if ((pos != spos && cbuf->wr > rd) || (pos == spos && rd <= cbuf->wr)) { + /* Mark the end of contiguous data for the reader */ + write_u16(cb_wr(cbuf), 0); + cb_add(cbuf, sizeof(uint16_t)); + } + } + + if (rd && rd <= cbuf->wr) { + cb_wr_reset(cbuf); + if (pos == spos) { + /* Reuse the same buffer if nothing was built. */ + goto start; } } } out: TRACE_LEAVE(QUIC_EV_CONN_PHPKTS, ctx->conn); - return 1; + return total; err: TRACE_DEVEL("leaving in error", QUIC_EV_CONN_PHPKTS, ctx->conn); - return 0; + return -1; } /* Send the QUIC packets which have been prepared for QUIC connections - * with as I/O handler context. + * from ring buffer with as I/O handler context. */ -int qc_send_ppkts(struct ssl_sock_ctx *ctx) +int qc_send_ppkts(struct qring *qr, struct ssl_sock_ctx *ctx) { struct quic_conn *qc; - struct buffer tmpbuf = { }; - struct q_buf *rbuf; + struct cbuf *cbuf; qc = ctx->conn->qc; - for (rbuf = q_rbuf(qc); !q_buf_empty(rbuf) ; rbuf = q_next_rbuf(qc)) { - struct quic_tx_packet *p, *q; + cbuf = qr->cbuf; + while (cb_contig_data(cbuf)) { + unsigned char *pos; + struct buffer tmpbuf = { }; + struct quic_tx_packet *first_pkt, *pkt, *next_pkt; + uint16_t dglen; + size_t headlen = sizeof dglen + sizeof first_pkt; unsigned int time_sent; - tmpbuf.area = (char *)rbuf->area; - tmpbuf.size = tmpbuf.data = rbuf->data; + pos = cb_rd(cbuf); + dglen = read_u16(pos); + /* End of prepared datagrams. + * Reset the reader index only if in front of the writer index. + */ + if (!dglen) { + int wr = HA_ATOMIC_LOAD(&cbuf->wr); + + if (wr && wr < cbuf->rd) { + cb_rd_reset(cbuf); + continue; + } + break; + } + + pos += sizeof dglen; + first_pkt = read_ptr(pos); + pos += sizeof first_pkt; + tmpbuf.area = (char *)pos; + tmpbuf.size = tmpbuf.data = dglen; + TRACE_PROTO("to send", QUIC_EV_CONN_SPPKTS, ctx->conn); + for (pkt = first_pkt; pkt; pkt = pkt->next) + quic_tx_packet_refinc(pkt); if (ctx->xprt->snd_buf(qc->conn, qc->conn->xprt_ctx, - &tmpbuf, tmpbuf.data, 0) <= 0) + &tmpbuf, tmpbuf.data, 0) <= 0) { + for (pkt = first_pkt; pkt; pkt = pkt->next) + quic_tx_packet_refdec(pkt); break; + } + cb_del(cbuf, dglen + headlen); qc->tx.bytes += tmpbuf.data; time_sent = now_ms; - /* Reset this buffer to make it available for the next packet to prepare. */ - q_buf_reset(rbuf); - /* Remove from the packets which have just been sent. */ - list_for_each_entry_safe(p, q, &rbuf->pkts, list) { - p->time_sent = time_sent; - if (p->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) { - p->pktns->tx.time_of_last_eliciting = time_sent; + + for (pkt = first_pkt; pkt; pkt = next_pkt) { + pkt->time_sent = time_sent; + if (pkt->flags & QUIC_FL_TX_PACKET_ACK_ELICITING) { + pkt->pktns->tx.time_of_last_eliciting = time_sent; qc->path->ifae_pkts++; } - qc->path->in_flight += p->in_flight_len; - p->pktns->tx.in_flight += p->in_flight_len; - if (p->in_flight_len) + qc->path->in_flight += pkt->in_flight_len; + pkt->pktns->tx.in_flight += pkt->in_flight_len; + if (pkt->in_flight_len) qc_set_timer(ctx); - TRACE_PROTO("sent pkt", QUIC_EV_CONN_SPPKTS, ctx->conn, p); - LIST_DELETE(&p->list); + TRACE_PROTO("sent pkt", QUIC_EV_CONN_SPPKTS, ctx->conn, pkt); + next_pkt = pkt->next; + quic_tx_packet_refdec(pkt); } } @@ -2509,13 +2608,15 @@ int qc_treat_rx_pkts(struct quic_enc_level *el, struct ssl_sock_ctx *ctx) */ int qc_do_hdshk(struct ssl_sock_ctx *ctx) { - int ssl_err; + int ret, ssl_err; struct quic_conn *qc; enum quic_tls_enc_level tel, next_tel; struct quic_enc_level *qel, *next_qel; struct quic_tls_ctx *tls_ctx; + struct qring *qr; // Tx ring qc = ctx->conn->qc; + qr = NULL; TRACE_ENTER(QUIC_EV_CONN_HDSHK, ctx->conn, &qc->state); ssl_err = SSL_ERROR_NONE; if (!quic_get_tls_enc_levels(&tel, &next_tel, qc->state)) @@ -2538,12 +2639,18 @@ int qc_do_hdshk(struct ssl_sock_ctx *ctx) !qc_treat_rx_pkts(qel, ctx)) goto err; - if (!qc_prep_hdshk_pkts(ctx)) + if (!qr) + qr = MT_LIST_POP(qc->tx.qring_list, typeof(qr), mt_list); + ret = qc_prep_hdshk_pkts(qr, ctx); + if (ret == -1) goto err; + else if (ret == 0) + goto skip_send; - if (!qc_send_ppkts(ctx)) + if (!qc_send_ppkts(qr, ctx)) goto err; + skip_send: /* Check if there is something to do for the next level. */ if ((next_qel->tls_ctx.rx.flags & QUIC_FL_TLS_SECRETS_SET) && @@ -2561,15 +2668,18 @@ int qc_do_hdshk(struct ssl_sock_ctx *ctx) quic_pktns_discard(qc->els[QUIC_TLS_ENC_LEVEL_HANDSHAKE].pktns, qc); qc_set_timer(ctx); if (!quic_build_post_handshake_frames(qc) || - !qc_prep_phdshk_pkts(qc) || - !qc_send_ppkts(ctx)) + !qc_prep_phdshk_pkts(qr, qc) || + !qc_send_ppkts(qr, ctx)) goto err; out: + MT_LIST_APPEND(qc->tx.qring_list, &qr->mt_list); TRACE_LEAVE(QUIC_EV_CONN_HDSHK, ctx->conn, &qc->state); return 1; err: + if (qr) + MT_LIST_APPEND(qc->tx.qring_list, &qr->mt_list); TRACE_DEVEL("leaving in error", QUIC_EV_CONN_HDSHK, ctx->conn, &qc->state, &ssl_err); return 0; } @@ -3601,35 +3711,28 @@ static inline int qc_build_cfrms(struct quic_tx_packet *pkt, } /* This function builds a clear handshake packet used during a QUIC TLS handshakes - * into the current for QUIC connection with as QUIC - * TLS encryption level for outgoing packets filling it with as much as CRYPTO - * data as possible from offset in the CRYPTO data stream. Note that - * this offset value is updated by the length of the CRYPTO frame used to embed - * the CRYPTO data if this packet and only if the packet is successfully built. + * into a buffer with as position pointer and as QUIC TLS encryption level + * for QUIC connection and as QUIC TLS encryption level, filling the buffer + * with as much as CRYPTO. * The trailing QUIC_TLS_TAG_LEN bytes of this packet are not built. But they are - * reserved so that to be sure there is enough room to build this AEAD TAG after - * having successfully returned from this function and to be sure the position - * pointer of may be safely incremented by QUIC_TLS_TAG_LEN. After having - * returned from this function, position will point one past the last - * byte of the payload with the confidence there is at least QUIC_TLS_TAG_LEN bytes - * available packet to encrypt this packet. + * reserved so that to ensure there is enough room to build this AEAD TAG after + * having successfully returned from this function and to ensure the position + * pointer may be safely incremented by QUIC_TLS_TAG_LEN. * This function also update the value of pointer to point to the packet * number field in this packet. will also have the packet number * length as value. * - * Return the length of the packet if succeeded minus QUIC_TLS_TAG_LEN, or -1 if - * failed (not enough room in to build this packet plus QUIC_TLS_TAG_LEN - * bytes), -2 if there are too much CRYPTO data in flight to build a packet. + * Return 1 packet if succeeded or 0 if failed (not enough room in the buffer to build + * this packet, QUIC_TLS_TAG_LEN bytes for the encryption TAG included). */ -static ssize_t qc_do_build_hdshk_pkt(struct q_buf *wbuf, - struct quic_tx_packet *pkt, int pkt_type, - int64_t pn, size_t *pn_len, - unsigned char **buf_pn, - struct quic_enc_level *qel, - struct quic_conn *conn) +static int qc_do_build_hdshk_pkt(unsigned char *pos, const unsigned char *end, + struct quic_tx_packet *pkt, int pkt_type, + int64_t pn, size_t *pn_len, + unsigned char **buf_pn, + struct quic_enc_level *qel, + struct quic_conn *conn) { - unsigned char *beg, *pos; - const unsigned char *end; + unsigned char *beg; size_t len, len_frms, token_fields_len, padding_len; struct quic_frame frm = { .type = QUIC_FT_CRYPTO, }; struct quic_frame ack_frm = { .type = QUIC_FT_ACK, }; @@ -3640,8 +3743,7 @@ static ssize_t qc_do_build_hdshk_pkt(struct q_buf *wbuf, /* Length field value with CRYPTO frames if present. */ len_frms = 0; - beg = pos = q_buf_getpos(wbuf); - end = q_buf_end(wbuf); + beg = pos; /* When not probing and not acking, reduce the size of this buffer to respect * the congestion controller window. */ @@ -3791,19 +3893,23 @@ static ssize_t qc_do_build_hdshk_pkt(struct q_buf *wbuf, * if it was set. It is handle by the loss detection timer. */ qel->pktns->tx.pto_probe = 0; + pkt->len = pos - beg; out: - return pos - beg; + return 1; err: - return -1; + return 0; } static inline void quic_tx_packet_init(struct quic_tx_packet *pkt) { + pkt->len = 0; pkt->cdata_len = 0; pkt->in_flight_len = 0; LIST_INIT(&pkt->frms); + pkt->next = NULL; + pkt->refcnt = 1; } /* Free TX packet which has not already attached to any tree. */ @@ -3811,11 +3917,14 @@ static inline void free_quic_tx_packet(struct quic_tx_packet *pkt) { struct quic_tx_frm *frm, *frmbak; + if (!pkt) + return; + list_for_each_entry_safe(frm, frmbak, &pkt->frms, list) { LIST_DELETE(&frm->list); pool_free(pool_head_quic_tx_frm, frm); } - pool_free(pool_head_quic_tx_packet, pkt); + quic_tx_packet_refdec(pkt); } /* Build a handshake packet into packet buffer with as packet @@ -3825,101 +3934,99 @@ static inline void free_quic_tx_packet(struct quic_tx_packet *pkt) * not enough room in to build the packet, or the size of the built packet * if succeeded (may be zero if there is too much crypto data in flight to build the packet). */ -static ssize_t qc_build_hdshk_pkt(struct q_buf *buf, struct quic_conn *qc, int pkt_type, - struct quic_enc_level *qel) +static struct quic_tx_packet *qc_build_hdshk_pkt(unsigned char **pos, + const unsigned char *buf_end, + struct quic_conn *qc, int pkt_type, + struct quic_enc_level *qel, int *err) { /* The pointer to the packet number field. */ unsigned char *buf_pn; unsigned char *beg, *end, *payload; int64_t pn; size_t pn_len, payload_len, aad_len; - ssize_t pkt_len; struct quic_tls_ctx *tls_ctx; struct quic_tx_packet *pkt; TRACE_ENTER(QUIC_EV_CONN_HPKT, qc->conn, NULL, qel); + *err = 0; pkt = pool_alloc(pool_head_quic_tx_packet); if (!pkt) { TRACE_DEVEL("Not enough memory for a new packet", QUIC_EV_CONN_HPKT, qc->conn); - return -2; + *err = -2; + goto err; } quic_tx_packet_init(pkt); - beg = q_buf_getpos(buf); + beg = *pos; pn_len = 0; buf_pn = NULL; pn = qel->pktns->tx.next_pn + 1; - pkt_len = qc_do_build_hdshk_pkt(buf, pkt, pkt_type, pn, &pn_len, &buf_pn, qel, qc); - if (pkt_len <= 0) { - free_quic_tx_packet(pkt); - return pkt_len; + if (!qc_do_build_hdshk_pkt(*pos, buf_end, pkt, pkt_type, pn, &pn_len, &buf_pn, qel, qc)) { + *err = -1; + goto err; } - end = beg + pkt_len; + end = beg + pkt->len; payload = buf_pn + pn_len; payload_len = end - payload; aad_len = payload - beg; tls_ctx = &qel->tls_ctx; - if (!quic_packet_encrypt(payload, payload_len, beg, aad_len, pn, tls_ctx, qc->conn)) + if (!quic_packet_encrypt(payload, payload_len, beg, aad_len, pn, tls_ctx, qc->conn)) { + *err = -2; goto err; + } end += QUIC_TLS_TAG_LEN; - pkt_len += QUIC_TLS_TAG_LEN; + pkt->len += QUIC_TLS_TAG_LEN; if (!quic_apply_header_protection(beg, buf_pn, pn_len, tls_ctx->tx.hp, tls_ctx->tx.hp_key)) { TRACE_DEVEL("Could not apply the header protection", QUIC_EV_CONN_HPKT, qc->conn); + *err = -2; goto err; } - /* Now that a correct packet is built, let us set the position pointer of - * buffer for the next packet. - */ - q_buf_setpos(buf, end); + /* Now that a correct packet is built, let us consume <*pos> buffer. */ + *pos = end; /* Consume a packet number. */ ++qel->pktns->tx.next_pn; /* Attach the built packet to its tree. */ pkt->pn_node.key = qel->pktns->tx.next_pn; /* Set the packet in fligth length for in flight packet only. */ if (pkt->flags & QUIC_FL_TX_PACKET_IN_FLIGHT) { - pkt->in_flight_len = pkt_len; - qc->path->prep_in_flight += pkt_len; + pkt->in_flight_len = pkt->len; + qc->path->prep_in_flight += pkt->len; } pkt->pktns = qel->pktns; eb64_insert(&qel->pktns->tx.pkts, &pkt->pn_node); - /* Increment the number of bytes in buffer by the length of this packet. */ - buf->data += pkt_len; - /* Attach this packet to . */ - LIST_APPEND(&buf->pkts, &pkt->list); TRACE_LEAVE(QUIC_EV_CONN_HPKT, qc->conn, pkt); - return pkt_len; + return pkt; err: free_quic_tx_packet(pkt); TRACE_DEVEL("leaving in error", QUIC_EV_CONN_HPKT, qc->conn); - return -2; + return NULL; } /* Prepare a clear post handhskake packet for QUIC connection. * Return the length of this packet if succeeded, -1 was full. */ -static ssize_t qc_do_build_phdshk_apkt(struct q_buf *wbuf, - struct quic_tx_packet *pkt, - int64_t pn, size_t *pn_len, - unsigned char **buf_pn, struct quic_enc_level *qel, - struct quic_conn *conn) +static int qc_do_build_phdshk_apkt(unsigned char *pos, const unsigned char *end, + struct quic_tx_packet *pkt, + int64_t pn, size_t *pn_len, + unsigned char **buf_pn, + struct quic_enc_level *qel, + struct quic_conn *conn) { - const unsigned char *beg, *end; - unsigned char *pos; + const unsigned char *beg; struct quic_frame *frm, *sfrm; struct quic_frame ack_frm = { .type = QUIC_FT_ACK, }; size_t fake_len, ack_frm_len; int64_t largest_acked_pn; TRACE_ENTER(QUIC_EV_CONN_PAPKT, conn->conn); - beg = pos = q_buf_getpos(wbuf); - end = q_buf_end(wbuf); + beg = pos; /* When not probing and not acking, reduce the size of this buffer to respect * the congestion controller window. */ @@ -4012,108 +4119,116 @@ static ssize_t qc_do_build_phdshk_apkt(struct q_buf *wbuf, LIST_APPEND(&pkt->frms, &frm->list); pos = ppos; } + pkt->len = pos - beg; out: TRACE_LEAVE(QUIC_EV_CONN_PAPKT, conn->conn); - return pos - beg; + return 1; err: - TRACE_DEVEL("leaving in error (buffer full)", QUIC_EV_CONN_PAPKT, conn->conn); - return -1; + return 0; } /* Prepare a post handhskake packet at Application encryption level for * QUIC connection. - * Return the length of this packet if succeeded, -1 if was full, - * -2 in case of major error (encryption failure). + * Return the length if succeeded, -1 if was full, -2 in case of major error + * (allocation or encryption failures). */ -static ssize_t qc_build_phdshk_apkt(struct q_buf *wbuf, struct quic_conn *qc) +static struct quic_tx_packet *qc_build_phdshk_apkt(unsigned char **pos, + const unsigned char *buf_end, + struct quic_conn *qc, int *err) { /* A pointer to the packet number field in */ unsigned char *buf_pn; unsigned char *beg, *end, *payload; int64_t pn; size_t pn_len, aad_len, payload_len; - ssize_t pkt_len; struct quic_tls_ctx *tls_ctx; struct quic_enc_level *qel; struct quic_tx_packet *pkt; TRACE_ENTER(QUIC_EV_CONN_PAPKT, qc->conn); + *err = 0; pkt = pool_alloc(pool_head_quic_tx_packet); if (!pkt) { TRACE_DEVEL("Not enough memory for a new packet", QUIC_EV_CONN_PAPKT, qc->conn); - return -2; + *err = -2; + goto err; } quic_tx_packet_init(pkt); - beg = q_buf_getpos(wbuf); + beg = *pos; qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP]; pn_len = 0; buf_pn = NULL; pn = qel->pktns->tx.next_pn + 1; - pkt_len = qc_do_build_phdshk_apkt(wbuf, pkt, pn, &pn_len, &buf_pn, qel, qc); - if (pkt_len <= 0) { - free_quic_tx_packet(pkt); - return pkt_len; + if (!qc_do_build_phdshk_apkt(*pos, buf_end, pkt, pn, &pn_len, &buf_pn, qel, qc)) { + *err = -1; + goto err; } - end = beg + pkt_len; + end = beg + pkt->len; payload = buf_pn + pn_len; payload_len = end - payload; aad_len = payload - beg; tls_ctx = &qel->tls_ctx; - if (!quic_packet_encrypt(payload, payload_len, beg, aad_len, pn, tls_ctx, qc->conn)) + if (!quic_packet_encrypt(payload, payload_len, beg, aad_len, pn, tls_ctx, qc->conn)) { + *err = -2; goto err; + } end += QUIC_TLS_TAG_LEN; - pkt_len += QUIC_TLS_TAG_LEN; + pkt->len += QUIC_TLS_TAG_LEN; if (!quic_apply_header_protection(beg, buf_pn, pn_len, - tls_ctx->tx.hp, tls_ctx->tx.hp_key)) + tls_ctx->tx.hp, tls_ctx->tx.hp_key)) { + TRACE_DEVEL("Could not apply the header protection", QUIC_EV_CONN_PAPKT, qc->conn); + *err = -2; goto err; + } - q_buf_setpos(wbuf, end); + /* Now that a correct packet is built, let us consume <*pos> buffer. */ + *pos = end; /* Consume a packet number. */ ++qel->pktns->tx.next_pn; /* Attach the built packet to its tree. */ pkt->pn_node.key = qel->pktns->tx.next_pn; - eb64_insert(&qel->pktns->tx.pkts, &pkt->pn_node); /* Set the packet in fligth length for in flight packet only. */ if (pkt->flags & QUIC_FL_TX_PACKET_IN_FLIGHT) { - pkt->in_flight_len = pkt_len; - qc->path->prep_in_flight += pkt_len; + pkt->in_flight_len = pkt->len; + qc->path->prep_in_flight += pkt->len; } pkt->pktns = qel->pktns; - /* Increment the number of bytes in buffer by the length of this packet. */ - wbuf->data += pkt_len; - /* Attach this packet to . */ - LIST_APPEND(&wbuf->pkts, &pkt->list); - + eb64_insert(&qel->pktns->tx.pkts, &pkt->pn_node); TRACE_LEAVE(QUIC_EV_CONN_PAPKT, qc->conn, pkt); - return pkt_len; + return pkt; err: free_quic_tx_packet(pkt); TRACE_DEVEL("leaving in error", QUIC_EV_CONN_PAPKT, qc->conn); - return -2; + return NULL; } /* Prepare a maximum of QUIC Application level packets from QUIC * connection I/O handler context. * Returns 1 if succeeded, 0 if not. */ -int qc_prep_phdshk_pkts(struct quic_conn *qc) +int qc_prep_phdshk_pkts(struct qring *qr, struct quic_conn *qc) { - struct q_buf *wbuf; + struct cbuf *cbuf; + unsigned char *end_buf, *end, *pos; struct quic_enc_level *qel; TRACE_ENTER(QUIC_EV_CONN_PAPKTS, qc->conn); - wbuf = q_wbuf(qc); qel = &qc->els[QUIC_TLS_ENC_LEVEL_APP]; - while (q_buf_empty(wbuf)) { - ssize_t ret; + cbuf = qr->cbuf; + pos = cb_wr(cbuf); + end = end_buf = pos + cb_contig_space(cbuf); + while (pos < end_buf) { + int err; + uint16_t dglen; + struct quic_tx_packet *pkt; if (!(qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) && (LIST_ISEMPTY(&qel->pktns->tx.frms) || @@ -4123,23 +4238,28 @@ int qc_prep_phdshk_pkts(struct quic_conn *qc) break; } - ret = qc_build_phdshk_apkt(wbuf, qc); - switch (ret) { + /* Leave room for the datagram header */ + pos += sizeof dglen + sizeof pkt; + if (end - pos > qc->path->mtu) + end = pos + qc->path->mtu; + pkt = qc_build_phdshk_apkt(&pos, end, qc, &err); + switch (err) { case -1: - /* Not enough room left in . */ - wbuf = q_next_wbuf(qc); - continue; + break; case -2: - return 0; + goto err; default: - /* XXX TO CHECK: consume a buffer. */ - wbuf = q_next_wbuf(qc); - continue; + dglen = pkt->len; + qc_set_dg(cbuf, dglen, pkt); } } + out: TRACE_LEAVE(QUIC_EV_CONN_PAPKTS, qc->conn); - return 1; + + err: + TRACE_DEVEL("leaving in error", QUIC_EV_CONN_PAPKTS, qc->conn); + return 0; } /* QUIC connection packet handler task. */ @@ -4152,11 +4272,14 @@ struct task *quic_conn_io_cb(struct task *t, void *context, unsigned int state) } else { struct quic_conn *qc = ctx->conn->qc; + struct qring *qr; + qr = MT_LIST_POP(qc->tx.qring_list, typeof(qr), mt_list); /* XXX TO DO: may fail!!! XXX */ qc_treat_rx_pkts(&qc->els[QUIC_TLS_ENC_LEVEL_APP], ctx); - qc_prep_phdshk_pkts(qc); - qc_send_ppkts(ctx); + qc_prep_phdshk_pkts(qr, qc); + qc_send_ppkts(qr, ctx); + MT_LIST_APPEND(qc->tx.qring_list, &qr->mt_list); } return t;