From e83f937cc1b59035b27aa4355efe028ef6ac8ce8 Mon Sep 17 00:00:00 2001 From: Amaury Denoyelle Date: Tue, 18 Apr 2023 11:10:54 +0200 Subject: [PATCH] MEDIUM: quic: use a global CID trees list Previously, quic_connection_id were stored in a per-thread tree list. Datagram were first dispatched to the correct thread using the encoded TID before a tree lookup was done. Remove these trees and replace it with a global trees list of 256 entries. A CID is using the list index corresponding to its first byte. On datagram dispatch, CID is lookup on its tree and TID is retrieved using new member quic_connection_id.tid. As such, a read-write lock protects each list instances. With 256 entries, it is expected that contention should be reduced. A new structure quic_cid_tree served as a tree container associated with its read-write lock. An API is implemented to ensure lock safety for insert/lookup/delete operation. This patch is a step forward to be able to break the affinity between a CID and a TID encoded thread. This is required to be able to migrate a quic_conn after accept to select thread based on their load. This should be backported up to 2.7 after a period of observation. --- include/haproxy/proto_quic.h | 6 +++ include/haproxy/quic_conn-t.h | 2 +- include/haproxy/quic_conn.h | 77 ++++++++++++++------------- include/haproxy/thread.h | 1 + src/proto_quic.c | 20 +++++++- src/quic_conn.c | 97 +++++++++++++++++++++++++++++++---- src/quic_sock.c | 14 ++++- src/thread.c | 1 + 8 files changed, 166 insertions(+), 52 deletions(-) diff --git a/include/haproxy/proto_quic.h b/include/haproxy/proto_quic.h index 8677cbfaed..a0e2b98160 100644 --- a/include/haproxy/proto_quic.h +++ b/include/haproxy/proto_quic.h @@ -24,6 +24,12 @@ extern struct protocol proto_quic4; extern struct protocol proto_quic6; +struct quic_cid_tree { + struct eb_root root; + __decl_thread(HA_RWLOCK_T lock); +}; + extern struct quic_dghdlr *quic_dghdlrs; +extern struct quic_cid_tree *quic_cid_trees; #endif /* _HAPROXY_PROTO_QUIC_H */ diff --git a/include/haproxy/quic_conn-t.h b/include/haproxy/quic_conn-t.h index d366a0b488..c8c6f48d33 100644 --- a/include/haproxy/quic_conn-t.h +++ b/include/haproxy/quic_conn-t.h @@ -303,6 +303,7 @@ struct quic_connection_id { struct quic_cid cid; /* CID data */ struct quic_conn *qc; /* QUIC connection using this CID */ + uint tid; /* Attached Thread ID for the connection. */ }; /* Structure to hold a range of ACKs sent in ACK frames. */ @@ -444,7 +445,6 @@ struct quic_rx_packet { struct quic_dghdlr { struct mt_list dgrams; struct tasklet *task; - struct eb_root cids; }; /* Structure to store enough information about the RX CRYPTO frames. */ diff --git a/include/haproxy/quic_conn.h b/include/haproxy/quic_conn.h index e1b333ec5a..b52535ff3e 100644 --- a/include/haproxy/quic_conn.h +++ b/include/haproxy/quic_conn.h @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -137,9 +138,41 @@ static inline void quic_cid_dump(struct buffer *buf, chunk_appendf(buf, ")"); } -/* Free the CIDs attached to QUIC connection. This must be called under - * the CID lock. - */ +/* Return tree index where is stored. */ +static inline uchar _quic_cid_tree_idx(const unsigned char *cid) +{ + return cid[0]; +} + +/* Return tree index where is stored. */ +static inline uchar quic_cid_tree_idx(const struct quic_cid *cid) +{ + return _quic_cid_tree_idx(cid->data); +} + +/* Insert into global CID tree as a thread-safe operation. */ +static inline void quic_cid_insert(struct quic_connection_id *conn_id) +{ + const uchar idx = quic_cid_tree_idx(&conn_id->cid); + struct quic_cid_tree *tree = &quic_cid_trees[idx]; + + HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); + ebmb_insert(&tree->root, &conn_id->node, conn_id->cid.len); + HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); +} + +/* Remove from global CID tree as a thread-safe operation. */ +static inline void quic_cid_delete(struct quic_connection_id *conn_id) +{ + const uchar idx = quic_cid_tree_idx(&conn_id->cid); + struct quic_cid_tree *tree = &quic_cid_trees[idx]; + + HA_RWLOCK_WRLOCK(QC_CID_LOCK, &tree->lock); + ebmb_delete(&conn_id->node); + HA_RWLOCK_WRUNLOCK(QC_CID_LOCK, &tree->lock); +} + +/* Free the CIDs attached to QUIC connection. */ static inline void free_quic_conn_cids(struct quic_conn *conn) { struct eb64_node *node; @@ -151,7 +184,7 @@ static inline void free_quic_conn_cids(struct quic_conn *conn) conn_id = eb64_entry(node, struct quic_connection_id, seq_num); /* remove the CID from the receiver tree */ - ebmb_delete(&conn_id->node); + quic_cid_delete(conn_id); /* remove the CID from the quic_conn tree */ node = eb64_next(node); @@ -175,39 +208,6 @@ static inline void quic_connection_id_to_frm_cpy(struct quic_frame *dst, to->stateless_reset_token = src->stateless_reset_token; } -/* extract a TID from a CID for receiver , from 0 to global.nbthread-1 and - * in any case no more than 4095. It takes into account the bind_conf's thread - * group and the bind_conf's thread mask. The algorithm is the following: most - * packets contain a valid thread ID for the bind_conf, which means that the - * retrieved ID directly maps to a bound thread ID. If that's not the case, - * then we have to remap it. The resulting thread ID will then differ but will - * be correctly encoded and decoded. - */ -static inline uint quic_get_cid_tid(const unsigned char *cid, const struct receiver *rx) -{ - uint id, grp; - uint base, count; - - id = read_n16(cid) & 4095; - grp = rx->bind_tgroup; - base = ha_tgroup_info[grp - 1].base; - count = ha_tgroup_info[grp - 1].count; - - if (base <= id && id < base + count && - rx->bind_thread & ha_thread_info[id].ltid_bit) - return id; // part of the group and bound: valid - - /* The thread number isn't valid, it doesn't map to a thread bound on - * this receiver. Let's reduce it to one of the thread(s) valid for - * that receiver. - */ - count = my_popcountl(rx->bind_thread); - id = count - 1 - id % count; - id = mask_find_rank_bit(id, rx->bind_thread); - id += base; - return id; -} - /* Modify to have a CID linked to the thread ID that * quic_get_cid_tid() will be able to extract return. */ @@ -676,6 +676,9 @@ int quic_get_dgram_dcid(unsigned char *buf, const unsigned char *end, unsigned char **dcid, size_t *dcid_len); struct quic_cid quic_derive_cid(const struct quic_cid *orig, const struct sockaddr_storage *addr); +int quic_get_cid_tid(const unsigned char *cid, size_t cid_len, + const struct sockaddr_storage *cli_addr, + unsigned char *buf, size_t buf_len); int qc_send_mux(struct quic_conn *qc, struct list *frms); void qc_notify_close(struct quic_conn *qc); diff --git a/include/haproxy/thread.h b/include/haproxy/thread.h index b504ab675e..30ff7827a0 100644 --- a/include/haproxy/thread.h +++ b/include/haproxy/thread.h @@ -424,6 +424,7 @@ enum lock_label { SFT_LOCK, /* sink forward target */ IDLE_CONNS_LOCK, OCSP_LOCK, + QC_CID_LOCK, OTHER_LOCK, /* WT: make sure never to use these ones outside of development, * we need them for lock profiling! diff --git a/src/proto_quic.c b/src/proto_quic.c index b1708eb9f2..0961fc2104 100644 --- a/src/proto_quic.c +++ b/src/proto_quic.c @@ -51,6 +51,11 @@ /* per-thread quic datagram handlers */ struct quic_dghdlr *quic_dghdlrs; +struct eb_root *quic_cid_tree; + +/* global CID trees */ +#define QUIC_CID_TREES_CNT 256 +struct quic_cid_tree *quic_cid_trees; /* Size of the internal buffer of QUIC RX buffer at the fd level */ #define QUIC_RX_BUFSZ (1UL << 18) @@ -734,11 +739,20 @@ static int quic_alloc_dghdlrs(void) dghdlr->task->context = dghdlr; dghdlr->task->process = quic_lstnr_dghdlr; - dghdlr->cids = EB_ROOT_UNIQUE; - MT_LIST_INIT(&dghdlr->dgrams); } + quic_cid_trees = calloc(QUIC_CID_TREES_CNT, sizeof(struct quic_cid_tree)); + if (!quic_cid_trees) { + ha_alert("Failed to allocate global CIDs trees.\n"); + return 0; + } + + for (i = 0; i < QUIC_CID_TREES_CNT; ++i) { + HA_RWLOCK_INIT(&quic_cid_trees[i].lock); + quic_cid_trees[i].root = EB_ROOT_UNIQUE; + } + return 1; } REGISTER_POST_CHECK(quic_alloc_dghdlrs); @@ -753,6 +767,8 @@ static int quic_deallocate_dghdlrs(void) free(quic_dghdlrs); } + ha_free(&quic_cid_trees); + return 1; } REGISTER_POST_DEINIT(quic_deallocate_dghdlrs); diff --git a/src/quic_conn.c b/src/quic_conn.c index e1725e5c9b..1a6d1176a0 100644 --- a/src/quic_conn.c +++ b/src/quic_conn.c @@ -3253,8 +3253,7 @@ static int qc_parse_pkt_frms(struct quic_conn *qc, struct quic_rx_packet *pkt, TRACE_ERROR("CID allocation error", QUIC_EV_CONN_IO_CB, qc); } else { - /* insert the allocated CID in the receiver datagram handler tree */ - ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len); + quic_cid_insert(conn_id); qc_build_new_connection_id_frm(qc, conn_id); } break; @@ -3969,6 +3968,59 @@ struct quic_cid quic_derive_cid(const struct quic_cid *orig, return cid; } +/* Retrieve the thread ID associated to QUIC connection ID of length + * . CID may be not found on the CID tree because it is an ODCID. In + * this case, it will derived using client address as hash + * parameter. However, this is done only if points to an INITIAL or 0RTT + * packet of length . + * + * Returns the thread ID or a negative error code. + */ +int quic_get_cid_tid(const unsigned char *cid, size_t cid_len, + const struct sockaddr_storage *cli_addr, + unsigned char *buf, size_t buf_len) +{ + struct quic_cid_tree *tree; + struct quic_connection_id *conn_id; + struct ebmb_node *node; + + tree = &quic_cid_trees[_quic_cid_tree_idx(cid)]; + HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock); + node = ebmb_lookup(&tree->root, cid, cid_len); + HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock); + + if (!node) { + struct quic_cid orig, derive_cid; + struct quic_rx_packet pkt; + + if (!qc_parse_hd_form(&pkt, &buf, buf + buf_len)) + goto not_found; + + if (pkt.type != QUIC_PACKET_TYPE_INITIAL && + pkt.type != QUIC_PACKET_TYPE_0RTT) { + goto not_found; + } + + memcpy(orig.data, cid, cid_len); + orig.len = cid_len; + derive_cid = quic_derive_cid(&orig, cli_addr); + + tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)]; + HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock); + node = ebmb_lookup(&tree->root, cid, cid_len); + HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock); + } + + if (!node) + goto not_found; + + conn_id = ebmb_entry(node, struct quic_connection_id, node); + return HA_ATOMIC_LOAD(&conn_id->tid); + + not_found: + return -1; +} + /* Allocate a new CID and attach it to ebtree. * * If and params are non null, the new CID value is directly @@ -4016,6 +4068,7 @@ static struct quic_connection_id *new_quic_cid(struct eb_root *root, } conn_id->qc = qc; + HA_ATOMIC_STORE(&conn_id->tid, tid); conn_id->seq_num.key = qc->next_cid_seq_num++; conn_id->retire_prior_to = 0; @@ -4082,8 +4135,10 @@ static int quic_build_post_handshake_frames(struct quic_conn *qc) goto err; } - /* insert the allocated CID in the receiver datagram handler tree */ - ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len); + /* TODO To prevent CID tree locking, all CIDs created here + * could be allocated at the same time as the first one. + */ + quic_cid_insert(conn_id); quic_connection_id_to_frm_cpy(frm, conn_id); LIST_APPEND(&frm_list, &frm->list); @@ -4114,7 +4169,8 @@ static int quic_build_post_handshake_frames(struct quic_conn *qc) break; node = eb64_next(node); - ebmb_delete(&conn_id->node); + quic_cid_delete(conn_id); + eb64_delete(&conn_id->seq_num); pool_free(pool_head_quic_connection_id, conn_id); } @@ -5504,7 +5560,7 @@ static struct quic_conn *qc_new_conn(const struct quic_version *qv, int ipv4, /* insert the allocated CID in the receiver datagram handler tree */ if (server) - ebmb_insert(&quic_dghdlrs[tid].cids, &conn_id->node, conn_id->cid.len); + quic_cid_insert(conn_id); /* Select our SCID which is the first CID with 0 as sequence number. */ qc->scid = conn_id->cid; @@ -6511,11 +6567,15 @@ static struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt, struct quic_conn *qc = NULL; struct ebmb_node *node; struct quic_connection_id *conn_id; + struct quic_cid_tree *tree; TRACE_ENTER(QUIC_EV_CONN_RXPKT); /* First look into DCID tree. */ - node = ebmb_lookup(&quic_dghdlrs[tid].cids, pkt->dcid.data, pkt->dcid.len); + tree = &quic_cid_trees[_quic_cid_tree_idx(pkt->dcid.data)]; + HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock); + node = ebmb_lookup(&tree->root, pkt->dcid.data, pkt->dcid.len); + HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock); /* If not found on an Initial/0-RTT packet, it could be because an * ODCID is reused by the client. Calculate the derived CID value to @@ -6524,7 +6584,11 @@ static struct quic_conn *retrieve_qc_conn_from_cid(struct quic_rx_packet *pkt, if (!node && (pkt->type == QUIC_PACKET_TYPE_INITIAL || pkt->type == QUIC_PACKET_TYPE_0RTT)) { const struct quic_cid derive_cid = quic_derive_cid(&pkt->dcid, saddr); - node = ebmb_lookup(&quic_dghdlrs[tid].cids, derive_cid.data, derive_cid.len); + + tree = &quic_cid_trees[quic_cid_tree_idx(&derive_cid)]; + HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock); + node = ebmb_lookup(&tree->root, derive_cid.data, derive_cid.len); + HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock); } if (!node) @@ -8196,9 +8260,12 @@ int quic_dgram_parse(struct quic_dgram *dgram, struct quic_conn *from_qc, */ int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len) { - struct ebmb_node *node; + const uchar idx = _quic_cid_tree_idx(dcid); struct quic_connection_id *conn_id; + struct ebmb_node *node = NULL; + struct quic_cid_tree *tree = &quic_cid_trees[idx]; + /* Test against our default CID or client ODCID. */ if ((qc->scid.len == dcid_len && memcmp(qc->scid.data, dcid, dcid_len) == 0) || (qc->odcid.len == dcid_len && @@ -8206,7 +8273,17 @@ int qc_check_dcid(struct quic_conn *qc, unsigned char *dcid, size_t dcid_len) return 1; } - node = ebmb_lookup(&quic_dghdlrs[tid].cids, dcid, dcid_len); + /* Test against our other CIDs. This can happen if the client has + * decided to switch to a new one. + * + * TODO to avoid locking, loop through qc.cids as an alternative. + * + * TODO set it to our default CID to avoid this operation next time. + */ + HA_RWLOCK_RDLOCK(QC_CID_LOCK, &tree->lock); + node = ebmb_lookup(&tree->root, dcid, dcid_len); + HA_RWLOCK_RDUNLOCK(QC_CID_LOCK, &tree->lock); + if (node) { conn_id = ebmb_entry(node, struct quic_connection_id, node); if (qc == conn_id->qc) diff --git a/src/quic_sock.c b/src/quic_sock.c index 905ecab85f..3b13426f69 100644 --- a/src/quic_sock.c +++ b/src/quic_sock.c @@ -209,7 +209,6 @@ static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner struct quic_dgram *new_dgram, struct list *dgrams) { struct quic_dgram *dgram; - const struct listener *l = owner; unsigned char *dcid; size_t dcid_len; int cid_tid; @@ -221,7 +220,18 @@ static int quic_lstnr_dgram_dispatch(unsigned char *buf, size_t len, void *owner if (!dgram) goto err; - cid_tid = quic_get_cid_tid(dcid, &l->rx); + if ((cid_tid = quic_get_cid_tid(dcid, dcid_len, saddr, buf, len)) < 0) { + /* CID not found. Ensure only one thread will handle this CID + * to avoid duplicating connection creation. This code may not + * work if using thread-mask on the listener but is only here + * for a short time. + * + * TODO this code implies the thread used for QUIC handshake is + * directly derived from client chosen CID. This association + * must be broken. + */ + cid_tid = dcid[0] % global.nbthread; + } /* All the members must be initialized! */ dgram->owner = owner; diff --git a/src/thread.c b/src/thread.c index 6001f8c443..d7128252ed 100644 --- a/src/thread.c +++ b/src/thread.c @@ -443,6 +443,7 @@ static const char *lock_label(enum lock_label label) case SFT_LOCK: return "SFT"; case IDLE_CONNS_LOCK: return "IDLE_CONNS"; case OCSP_LOCK: return "OCSP"; + case QC_CID_LOCK: return "QC_CID"; case OTHER_LOCK: return "OTHER"; case DEBUG1_LOCK: return "DEBUG1"; case DEBUG2_LOCK: return "DEBUG2"; -- 2.39.5