From: Frédéric Lécaille Date: Mon, 30 Nov 2020 15:19:22 +0000 (+0100) Subject: MAJOR: quic: Make usage of ebtrees to store QUIC ACK ranges. X-Git-Tag: v2.4-dev5~62 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=8090b51e92f5e45aff424b3722a9f1c8836d7bb9;p=thirdparty%2Fhaproxy.git MAJOR: quic: Make usage of ebtrees to store QUIC ACK ranges. Store QUIC ACK ranges in ebtrees in place of lists with a 0(n) time complexity for insertion. --- diff --git a/include/haproxy/quic_frame-t.h b/include/haproxy/quic_frame-t.h index a2404efc9a..855a37ecf1 100644 --- a/include/haproxy/quic_frame-t.h +++ b/include/haproxy/quic_frame-t.h @@ -108,7 +108,7 @@ struct quic_ack { /* Structure used when emitting ACK frames. */ struct quic_tx_ack { uint64_t ack_delay; - struct quic_ack_ranges *ack_ranges; + struct quic_arngs *arngs; }; struct quic_reset_stream { diff --git a/include/haproxy/xprt_quic-t.h b/include/haproxy/xprt_quic-t.h index b25c3eb456..1155f5bed4 100644 --- a/include/haproxy/xprt_quic-t.h +++ b/include/haproxy/xprt_quic-t.h @@ -332,17 +332,25 @@ struct quic_transport_params { struct preferred_address preferred_address; /* Forbidden for clients */ }; -/* Structure for ACK ranges sent in ACK frames. */ -struct quic_ack_range { - struct list list; +/* Structure to hold a range of ACKs sent in ACK frames. */ +struct quic_arng { int64_t first; int64_t last; }; -struct quic_ack_ranges { - /* list of ACK ranges. */ - struct list list; - /* The number of ACK ranges is this lists */ +/* Structure to hold a range of ACKs to be store as a node in a tree of + * ACK ranges. + */ +struct quic_arng_node { + struct eb64_node first; + uint64_t last; +}; + +/* Structure to maintain a set of ACK ranges to be used to build ACK frames. */ +struct quic_arngs { + /* ebtree of ACK ranges organized by their first value. */ + struct eb_root root; + /* The number of ACK ranges is this tree */ size_t sz; /* The number of bytes required to encode this ACK ranges lists. */ size_t enc_sz; @@ -380,7 +388,7 @@ struct quic_pktns { int64_t largest_pn; /* Number of ack-eliciting packets. */ size_t nb_ack_eliciting; - struct quic_ack_ranges ack_ranges; + struct quic_arngs arngs; } rx; unsigned int flags; }; diff --git a/include/haproxy/xprt_quic.h b/include/haproxy/xprt_quic.h index 5b31c67166..339d06bdcb 100644 --- a/include/haproxy/xprt_quic.h +++ b/include/haproxy/xprt_quic.h @@ -982,9 +982,9 @@ static inline void quic_pktns_init(struct quic_pktns *pktns) pktns->rx.largest_pn = -1; pktns->rx.nb_ack_eliciting = 0; - LIST_INIT(&pktns->rx.ack_ranges.list); - pktns->rx.ack_ranges.sz = 0; - pktns->rx.ack_ranges.enc_sz = 0; + pktns->rx.arngs.root = EB_ROOT_UNIQUE; + pktns->rx.arngs.sz = 0; + pktns->rx.arngs.enc_sz = 0; pktns->flags = 0; } diff --git a/src/quic_frame.c b/src/quic_frame.c index 21b81cd731..e82f588b54 100644 --- a/src/quic_frame.c +++ b/src/quic_frame.c @@ -7,6 +7,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -147,26 +148,29 @@ static int quic_build_ack_frame(unsigned char **buf, const unsigned char *end, struct quic_frame *frm, struct quic_conn *conn) { struct quic_tx_ack *tx_ack = &frm->tx_ack; - struct quic_ack_range *ack_range, *next_ack_range; - - ack_range = LIST_NEXT(&tx_ack->ack_ranges->list, struct quic_ack_range *, list); - TRACE_PROTO("ack range", QUIC_EV_CONN_PRSAFRM, conn->conn,, &ack_range->last, &ack_range->first); - if (!quic_enc_int(buf, end, ack_range->last) || + struct eb64_node *ar, *prev_ar; + struct quic_arng_node *ar_node, *prev_ar_node; + + ar = eb64_last(&tx_ack->arngs->root); + ar_node = eb64_entry(&ar->node, struct quic_arng_node, first); + TRACE_PROTO("ack range", QUIC_EV_CONN_PRSAFRM, + conn->conn,, &ar_node->last, &ar_node->first.key); + if (!quic_enc_int(buf, end, ar_node->last) || !quic_enc_int(buf, end, tx_ack->ack_delay) || - !quic_enc_int(buf, end, tx_ack->ack_ranges->sz - 1) || - !quic_enc_int(buf, end, ack_range->last - ack_range->first)) + !quic_enc_int(buf, end, tx_ack->arngs->sz - 1) || + !quic_enc_int(buf, end, ar_node->last - ar_node->first.key)) return 0; - next_ack_range = LIST_NEXT(&ack_range->list, struct quic_ack_range *, list); - while (&next_ack_range->list != &tx_ack->ack_ranges->list) { + while ((prev_ar = eb64_prev(ar))) { + prev_ar_node = eb64_entry(&prev_ar->node, struct quic_arng_node, first); TRACE_PROTO("ack range", QUIC_EV_CONN_PRSAFRM, conn->conn,, - &next_ack_range->last, &next_ack_range->first); - if (!quic_enc_int(buf, end, ack_range->first - next_ack_range->last - 2) || - !quic_enc_int(buf, end, next_ack_range->last - next_ack_range->first)) + &prev_ar_node->last, &prev_ar_node->first.key); + if (!quic_enc_int(buf, end, ar_node->first.key - prev_ar_node->last - 2) || + !quic_enc_int(buf, end, prev_ar_node->last - prev_ar_node->first.key)) return 0; - ack_range = next_ack_range; - next_ack_range = LIST_NEXT(&ack_range->list, struct quic_ack_range *, list); + ar = prev_ar; + ar_node = eb64_entry(&ar->node, struct quic_arng_node, first); } return 1; diff --git a/src/xprt_quic.c b/src/xprt_quic.c index 4a4ad7d436..7cfbaa3e33 100644 --- a/src/xprt_quic.c +++ b/src/xprt_quic.c @@ -169,7 +169,7 @@ DECLARE_STATIC_POOL(pool_head_quic_crypto_buf, "quic_crypto_buf_pool", sizeof(st DECLARE_STATIC_POOL(pool_head_quic_frame, "quic_frame_pool", sizeof(struct quic_frame)); -DECLARE_STATIC_POOL(pool_head_quic_ack_range, "quic_ack_range_pool", sizeof(struct quic_ack_range)); +DECLARE_STATIC_POOL(pool_head_quic_arng, "quic_arng_pool", sizeof(struct quic_arng_node)); static ssize_t qc_build_hdshk_pkt(struct q_buf *buf, struct quic_conn *qc, int pkt_type, struct quic_enc_level *qel); @@ -1813,21 +1813,30 @@ static int quic_build_post_handshake_frames(struct quic_conn *conn) } /* Deallocate list of ACK ranges. */ -void free_ack_range_list(struct list *l) +void free_quic_arngs(struct quic_arngs *arngs) { - struct quic_ack_range *curr, *next; - - list_for_each_entry_safe(curr, next, l, list) { - LIST_DEL(&curr->list); - free(curr); + struct eb64_node *n; + struct quic_arng_node *ar; + + n = eb64_first(&arngs->root); + while (n) { + struct eb64_node *next; + + ar = eb64_entry(&n->node, struct quic_arng_node, first); + next = eb64_next(n); + eb64_delete(n); + free(ar); + n = next; } } -/* Return the gap value between

and ACK ranges. */ -static inline size_t sack_gap(struct quic_ack_range *p, - struct quic_ack_range *q) +/* Return the gap value between

and ACK ranges where follows

in + * descending order. + */ +static inline size_t sack_gap(struct quic_arng_node *p, + struct quic_arng_node *q) { - return p->first - q->last - 2; + return p->first.key - q->last - 2; } @@ -1835,35 +1844,71 @@ static inline size_t sack_gap(struct quic_ack_range *p, * encoded size until it goes below . * Returns 1 if succeded, 0 if not (no more element to remove). */ -static int quic_rm_last_ack_ranges(struct quic_ack_ranges *qars, size_t limit) +static int quic_rm_last_ack_ranges(struct quic_arngs *arngs, size_t limit) { - struct list *l = &qars->list; - struct quic_ack_range *last, *prev_last; + struct eb64_node *last, *prev; - last = LIST_PREV(l, struct quic_ack_range *, list); - while (qars->enc_sz > limit) { - if (l->n == l) - return 0; + last = eb64_last(&arngs->root); + while (last && arngs->enc_sz > limit) { + struct quic_arng_node *last_node, *prev_node; - prev_last = LIST_PREV(&last->list, struct quic_ack_range *, list); - if (prev_last == last) + prev = eb64_prev(last); + if (!prev) return 0; - qars->enc_sz -= quic_int_getsize(last->last - last->first); - qars->enc_sz -= quic_int_getsize(sack_gap(prev_last, last)); - qars->enc_sz -= quic_decint_size_diff(qars->sz); - --qars->sz; - LIST_DEL(&last->list); - pool_free(pool_head_quic_ack_range, last); - last = prev_last; + last_node = eb64_entry(&last->node, struct quic_arng_node, first); + prev_node = eb64_entry(&prev->node, struct quic_arng_node, first); + arngs->enc_sz -= quic_int_getsize(last_node->last - last_node->first.key); + arngs->enc_sz -= quic_int_getsize(sack_gap(prev_node, last_node)); + arngs->enc_sz -= quic_decint_size_diff(arngs->sz); + --arngs->sz; + eb64_delete(last); + pool_free(pool_head_quic_arng, last); + last = prev; } return 1; } -/* Update list of ACK ranges with new packet number. +/* Set the encoded size of QUIC ack ranges. */ +static void quic_arngs_set_enc_sz(struct quic_arngs *arngs) +{ + struct eb64_node *node, *next; + struct quic_arng_node *ar, *ar_next; + + node = eb64_last(&arngs->root); + if (!node) + return; + + ar = eb64_entry(&node->node, struct quic_arng_node, first); + arngs->enc_sz = quic_int_getsize(ar->last) + + quic_int_getsize(ar->last - ar->first.key) + quic_int_getsize(arngs->sz - 1); + + while ((next = eb64_prev(node))) { + ar_next = eb64_entry(&next->node, struct quic_arng_node, first); + arngs->enc_sz += quic_int_getsize(sack_gap(ar, ar_next)) + + quic_int_getsize(ar_next->last - ar_next->first.key); + node = next; + ar = eb64_entry(&node->node, struct quic_arng_node, first); + } +} + +/* Insert in ebtree node with as range value. + * Returns the ebtree node which has been inserted. + */ +static inline +struct eb64_node *quic_insert_new_range(struct eb_root *root, + struct quic_arng_node *node, + struct quic_arng *ar) +{ + node->first.key = ar->first; + node->last = ar->last; + return eb64_insert(root, &node->first); +} + +/* Update tree of ACK ranges with as new ACK range value. * Note that this function computes the number of bytes required to encode - * this list without taking into an account ->ack_delay member field. + * this tree of ACK ranges in descending order. * * Descending order * -------------> @@ -1875,154 +1920,99 @@ static int quic_rm_last_ack_ranges(struct quic_ack_ranges *qars, size_t limit) * ..........+--------+--------------+--------+...... * diff1 gap12 diff2 * - * To encode the previous list of ranges we must encode integers as follows: - * enc(last1),enc(diff1),enc(gap12),enc(diff2) + * To encode the previous list of ranges we must encode integers as follows in + * descending order: + * enc(last2),enc(diff2),enc(gap12),enc(diff1) * with diff1 = last1 - first1 * diff2 = last2 - first2 - * gap12 = first1 - last2 - 2 + * gap12 = first1 - last2 - 2 (>= 0) * - * To update this encoded list, we must considered 4 cases: - * ->last is incremented by 1, the previous gap, if any, must be decremented by one, - * ->first is decremented by 1, the next gap, if any, must be decremented by one, - * in both previous cases value is increment by 1. - * -> a new range is inserted between two others, =0 (1 byte), - * and a gap is splitted in two other gaps, and the size of the list is incremented - * by 1. - * -> two ranges are merged. */ -int quic_update_ack_ranges_list(struct quic_ack_ranges *ack_ranges, int64_t pn) +int quic_update_ack_ranges_list(struct quic_arngs *arngs, + struct quic_arng *ar) { - struct list *l = &ack_ranges->list; - size_t *sz = &ack_ranges->sz; - size_t *enc_sz = &ack_ranges->enc_sz; - - struct quic_ack_range *curr, *prev, *next; - struct quic_ack_range *new_sack; + struct eb64_node *le; + struct quic_arng_node *new_node; + struct eb64_node *new; + + new = NULL; + if (eb_is_empty(&arngs->root)) { + /* First range insertion. */ + new_node = pool_alloc(pool_head_quic_arng); + if (!new_node) + return 0; - prev = NULL; + quic_insert_new_range(&arngs->root, new_node, ar); + /* Increment the size of these ranges. */ + arngs->sz++; + goto out; + } - if (LIST_ISEMPTY(l)) { - /* Range insertion. */ - new_sack = pool_alloc(pool_head_quic_ack_range); - if (!new_sack) + le = eb64_lookup_le(&arngs->root, ar->first); + if (!le) { + /* New insertion */ + new_node = pool_alloc(pool_head_quic_arng); + if (!new_node) return 0; - new_sack->first = new_sack->last = pn; - LIST_ADD(l, &new_sack->list); - /* Add the size of this new encoded range and the - * encoded number of ranges in this list after the first one - * which is 0 (1 byte). - */ - *enc_sz += quic_int_getsize(pn) + 2; - ++*sz; - return 1; + new = quic_insert_new_range(&arngs->root, new_node, ar); + /* Increment the size of these ranges. */ + arngs->sz++; } + else { + struct quic_arng_node *le_ar = + eb64_entry(&le->node, struct quic_arng_node, first); - list_for_each_entry_safe(curr, next, l, list) { - /* Already existing packet number */ - if (pn >= curr->first && pn <= curr->last) - break; + /* Already existing range */ + if (le_ar->first.key <= ar->first && le_ar->last >= ar->last) + return 1; - if (pn > curr->last + 1) { - /* Range insertion befor */ - new_sack = pool_alloc(pool_head_quic_ack_range); - if (!new_sack) + if (le_ar->last + 1 >= ar->first) { + le_ar->last = ar->last; + new = le; + new_node = le_ar; + } + else { + /* New insertion */ + new_node = pool_alloc(pool_head_quic_arng); + if (!new_node) return 0; - new_sack->first = new_sack->last = pn; - /* Add the size of this new encoded range and possibly - * increment by 1 the encoded number of ranges in this list. - */ - *enc_sz += quic_int_getsize(pn) + 1 + quic_incint_size_diff(*sz); - /* Deduce the previous largest number acked. */ - *enc_sz -= quic_int_getsize(curr->last); - if (prev) { - /* Insert after , before . */ - new_sack->list.n = &curr->list; - new_sack->list.p = &prev->list; - prev->list.n = curr->list.p = &new_sack->list; - /* Deduce the previous gap encoding size. - * Add thew new gaps between and and - * between and . - */ - *enc_sz += quic_int_getsize(sack_gap(prev, new_sack)) + - quic_int_getsize(sack_gap(new_sack, curr)) - - quic_int_getsize(sack_gap(prev, curr)); - } - else { - LIST_ADD(l, &new_sack->list); - /* Add the encoded size of the new gap betwen and . */ - *enc_sz += quic_int_getsize(sack_gap(new_sack, curr)); - } - ++*sz; - break; - } - else if (curr->last + 1 == pn) { - /* Increment the encoded size of diff by 1. */ - *enc_sz += quic_incint_size_diff(curr->last - curr->first); - if (prev) { - /* Decrement the encoded size of the previous gap by 1. */ - *enc_sz -= quic_decint_size_diff(sack_gap(prev, curr)); - } - else { - /* Increment the encode size of the largest acked packet number. */ - *enc_sz += quic_incint_size_diff(curr->last); - } - curr->last = pn; - break; + new = quic_insert_new_range(&arngs->root, new_node, ar); + /* Increment the size of these ranges. */ + arngs->sz++; } - else if (curr->first == pn + 1) { - if (&next->list != l && pn == next->last + 1) { - /* Two ranges and are merged. - * Dedude the encoded size of diff. */ - *enc_sz -= quic_int_getsize(curr->last - curr->first); - /* Deduce the encoded size of the gap between and . */ - *enc_sz -= quic_int_getsize(sack_gap(curr, next)); - /* Deduce the encode size of diff. */ - *enc_sz -= quic_int_getsize(next->last - next->first); - /* Add the new encoded size diff between curr->last and - * next->first. - */ - *enc_sz += quic_int_getsize(curr->last - next->first); - next->last = curr->last; - LIST_DEL(&curr->list); - pool_free(pool_head_quic_ack_range, curr); - /* Possibly decrement the encoded size of this list - * which is decremented by 1 - */ - *enc_sz -= quic_decint_size_diff(*sz); - --*sz; - } - else { - /* Increment the encoded size of diff by 1. */ - *enc_sz += quic_incint_size_diff(curr->last - curr->first); - /* Decrement the encoded size of the next gap by 1. */ - if (&next->list != l) - *enc_sz -= quic_decint_size_diff(sack_gap(curr, next)); - curr->first = pn; - } - break; - } - else if (&next->list == l) { - new_sack = pool_alloc(pool_head_quic_ack_range); - if (!new_sack) - return 0; + } - new_sack->first = new_sack->last = pn; - /* We only have to add the encoded size of the gap between - * and and diff (0). - */ - *enc_sz += quic_int_getsize(sack_gap(curr, new_sack)) + 1; - LIST_ADDQ(l, &new_sack->list); - ++*sz; - break; + /* Verify that the new inserted node does not overlap the nodes + * which follow it. + */ + if (new) { + uint64_t new_node_last; + struct eb64_node *next; + struct quic_arng_node *next_node; + + new_node_last = new_node->last; + while ((next = eb64_next(new))) { + next_node = + eb64_entry(&next->node, struct quic_arng_node, first); + if (new_node_last + 1 < next_node->first.key) + break; + + if (next_node->last > new_node->last) + new_node->last = next_node->last; + eb64_delete(next); + free(next_node); + /* Decrement the size of these ranges. */ + arngs->sz--; } - prev = curr; } + quic_arngs_set_enc_sz(arngs); + + out: return 1; } - /* Remove the header protection of packets at encryption level. * Always succeeds. */ @@ -2130,6 +2120,8 @@ int qc_treat_rx_pkts(struct quic_enc_level *el, struct quic_conn_ctx *ctx) QUIC_EV_CONN_ELRXPKTS, ctx->conn, pkt); } else { + struct quic_arng ar = { .first = pkt->pn, .last = pkt->pn }; + if (pkt->flags & QUIC_FL_RX_PACKET_ACK_ELICITING) { el->pktns->rx.nb_ack_eliciting++; if (!(el->pktns->rx.nb_ack_eliciting & 1)) @@ -2141,7 +2133,7 @@ int qc_treat_rx_pkts(struct quic_enc_level *el, struct quic_conn_ctx *ctx) el->pktns->rx.largest_pn = pkt->pn; /* Update the list of ranges to acknowledge. */ - if (!quic_update_ack_ranges_list(&el->pktns->rx.ack_ranges, pkt->pn)) { + if (!quic_update_ack_ranges_list(&el->pktns->rx.arngs, &ar)) { TRACE_DEVEL("Could not update ack range list", QUIC_EV_CONN_ELRXPKTS, ctx->conn); node = eb64_next(node); @@ -3141,10 +3133,10 @@ static int quic_ack_frm_reduce_sz(struct quic_frame *ack_frm, size_t limit) ack_delay_sz = quic_int_getsize(ack_frm->tx_ack.ack_delay); /* A frame is made of 1 byte for the frame type. */ room = limit - ack_delay_sz - 1; - if (!quic_rm_last_ack_ranges(ack_frm->tx_ack.ack_ranges, room)) + if (!quic_rm_last_ack_ranges(ack_frm->tx_ack.arngs, room)) return 0; - return 1 + ack_delay_sz + ack_frm->tx_ack.ack_ranges->enc_sz; + return 1 + ack_delay_sz + ack_frm->tx_ack.arngs->enc_sz; } /* Prepare as most as possible CRYPTO frames from prebuilt CRYPTO frames for @@ -3288,9 +3280,9 @@ static ssize_t qc_do_build_hdshk_pkt(struct q_buf *wbuf, /* Build an ACK frame if required. */ ack_frm_len = 0; if ((qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) && - !LIST_ISEMPTY(&qel->pktns->rx.ack_ranges.list)) { + !eb_is_empty(&qel->pktns->rx.arngs.root)) { ack_frm.tx_ack.ack_delay = 0; - ack_frm.tx_ack.ack_ranges = &qel->pktns->rx.ack_ranges; + ack_frm.tx_ack.arngs = &qel->pktns->rx.arngs; ack_frm_len = quic_ack_frm_reduce_sz(&ack_frm, end - pos); if (!ack_frm_len) goto err; @@ -3518,9 +3510,9 @@ static ssize_t qc_do_build_phdshk_apkt(struct q_buf *wbuf, /* Build an ACK frame if required. */ ack_frm_len = 0; if ((qel->pktns->flags & QUIC_FL_PKTNS_ACK_REQUIRED) && - !LIST_ISEMPTY(&qel->pktns->rx.ack_ranges.list)) { + !eb_is_empty(&qel->pktns->rx.arngs.root)) { ack_frm.tx_ack.ack_delay = 0; - ack_frm.tx_ack.ack_ranges = &qel->pktns->rx.ack_ranges; + ack_frm.tx_ack.arngs = &qel->pktns->rx.arngs; ack_frm_len = quic_ack_frm_reduce_sz(&ack_frm, end - pos); if (!ack_frm_len) goto err;