2 * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
4 * Licensed under the Apache License 2.0 (the "License"). You may not use
5 * this file except in compliance with the License. You can obtain a copy
6 * in the file LICENSE in the source distribution or at
7 * https://www.openssl.org/source/license.html
10 #include "internal/quic_ackm.h"
11 #include "internal/common.h"
18 * The TX Packet History object tracks information about packets which have been
19 * sent for which we later expect to receive an ACK. It is essentially a simple
20 * database keeping a list of packet information structures in packet number
21 * order which can also be looked up directly by packet number.
23 * We currently only allow packets to be appended to the list (i.e. the packet
24 * numbers of the packets appended to the list must monotonically increase), as
25 * we should not currently need more general functionality such as a sorted list
28 struct tx_pkt_history_st
{
29 /* A linked list of all our packets. */
30 OSSL_ACKM_TX_PKT
*head
, *tail
;
32 /* Number of packets in the list. */
36 * Mapping from packet numbers (uint64_t) to (OSSL_ACKM_TX_PKT *)
38 * Invariant: A packet is in this map if and only if it is in the linked
41 LHASH_OF(OSSL_ACKM_TX_PKT
) *map
;
44 * The lowest packet number which may currently be added to the history list
45 * (inclusive). We do not allow packet numbers to be added to the history
46 * list non-monotonically, so packet numbers must be greater than or equal
52 * Packet number of the highest packet info structure we have yet appended
53 * to the list. This is usually one less than watermark, except when we have
54 * not added any packet yet.
56 uint64_t highest_sent
;
59 DEFINE_LHASH_OF_EX(OSSL_ACKM_TX_PKT
);
61 static unsigned long tx_pkt_info_hash(const OSSL_ACKM_TX_PKT
*pkt
)
66 static int tx_pkt_info_compare(const OSSL_ACKM_TX_PKT
*a
,
67 const OSSL_ACKM_TX_PKT
*b
)
69 if (a
->pkt_num
< b
->pkt_num
)
71 if (a
->pkt_num
> b
->pkt_num
)
77 tx_pkt_history_init(struct tx_pkt_history_st
*h
)
79 h
->head
= h
->tail
= NULL
;
84 h
->map
= lh_OSSL_ACKM_TX_PKT_new(tx_pkt_info_hash
, tx_pkt_info_compare
);
92 tx_pkt_history_destroy(struct tx_pkt_history_st
*h
)
94 lh_OSSL_ACKM_TX_PKT_free(h
->map
);
96 h
->head
= h
->tail
= NULL
;
100 tx_pkt_history_add_actual(struct tx_pkt_history_st
*h
,
101 OSSL_ACKM_TX_PKT
*pkt
)
103 OSSL_ACKM_TX_PKT
*existing
;
106 * There should not be any existing packet with this number
109 existing
= lh_OSSL_ACKM_TX_PKT_retrieve(h
->map
, pkt
);
110 if (!ossl_assert(existing
== NULL
))
113 /* Should not already be in a list. */
114 if (!ossl_assert(pkt
->next
== NULL
&& pkt
->prev
== NULL
))
117 lh_OSSL_ACKM_TX_PKT_insert(h
->map
, pkt
);
131 /* Adds a packet information structure to the history list. */
133 tx_pkt_history_add(struct tx_pkt_history_st
*h
,
134 OSSL_ACKM_TX_PKT
*pkt
)
136 if (!ossl_assert(pkt
->pkt_num
>= h
->watermark
))
139 if (tx_pkt_history_add_actual(h
, pkt
) < 1)
142 h
->watermark
= pkt
->pkt_num
+ 1;
143 h
->highest_sent
= pkt
->pkt_num
;
147 /* Retrieve a packet information structure by packet number. */
148 static OSSL_ACKM_TX_PKT
*
149 tx_pkt_history_by_pkt_num(struct tx_pkt_history_st
*h
, uint64_t pkt_num
)
151 OSSL_ACKM_TX_PKT key
;
153 key
.pkt_num
= pkt_num
;
155 return lh_OSSL_ACKM_TX_PKT_retrieve(h
->map
, &key
);
158 /* Remove a packet information structure from the history log. */
160 tx_pkt_history_remove(struct tx_pkt_history_st
*h
, uint64_t pkt_num
)
162 OSSL_ACKM_TX_PKT key
, *pkt
;
163 key
.pkt_num
= pkt_num
;
165 pkt
= tx_pkt_history_by_pkt_num(h
, pkt_num
);
169 if (pkt
->prev
!= NULL
)
170 pkt
->prev
->next
= pkt
->next
;
171 if (pkt
->next
!= NULL
)
172 pkt
->next
->prev
= pkt
->prev
;
178 pkt
->prev
= pkt
->next
= NULL
;
180 lh_OSSL_ACKM_TX_PKT_delete(h
->map
, &key
);
186 * RX Packet Number Tracking
187 * *************************
189 * **Background.** The RX side of the ACK manager must track packets we have
190 * received for which we have to generate ACK frames. Broadly, this means we
191 * store a set of packet numbers which we have received but which we do not know
192 * for a fact that the transmitter knows we have received.
194 * This must handle various situations:
196 * 1. We receive a packet but have not sent an ACK yet, so the transmitter
197 * does not know whether we have received it or not yet.
199 * 2. We receive a packet and send an ACK which is lost. We do not
200 * immediately know that the ACK was lost and the transmitter does not know
201 * that we have received the packet.
203 * 3. We receive a packet and send an ACK which is received by the
204 * transmitter. The transmitter does not immediately respond with an ACK,
205 * or responds with an ACK which is lost. The transmitter knows that we
206 * have received the packet, but we do not know for sure that it knows,
207 * because the ACK we sent could have been lost.
209 * 4. We receive a packet and send an ACK which is received by the
210 * transmitter. The transmitter subsequently sends us an ACK which confirms
211 * its receipt of the ACK we sent, and we successfully receive that ACK, so
212 * we know that the transmitter knows, that we received the original
215 * Only when we reach case (4) are we relieved of any need to track a given
216 * packet number we have received, because only in this case do we know for sure
217 * that the peer knows we have received the packet. Having reached case (4) we
218 * will never again need to generate an ACK containing the PN in question, but
219 * until we reach that point, we must keep track of the PN as not having been
220 * provably ACKed, as we may have to keep generating ACKs for the given PN not
221 * just until the transmitter receives one, but until we know that it has
222 * received one. This will be referred to herein as "provably ACKed".
224 * **Duplicate handling.** The above discusses the case where we have received a
225 * packet with a given PN but are at best unsure whether the sender knows we
226 * have received it or not. However, we must also handle the case where we have
227 * yet to receive a packet with a given PN in the first place. The reason for
228 * this is because of the requirement expressed by RFC 9000 s. 12.3:
230 * "A receiver MUST discard a newly unprotected packet unless it is certain
231 * that it has not processed another packet with the same packet number from
232 * the same packet number space."
234 * We must ensure we never process a duplicate PN. As such, each possible PN we
235 * can receive must exist in one of the following logical states:
237 * - We have never processed this PN before
238 * (so if we receive such a PN, it can be processed)
240 * - We have processed this PN but it has not yet been provably ACKed
241 * (and should therefore be in any future ACK frame generated;
242 * if we receive such a PN again, it must be ignored)
244 * - We have processed this PN and it has been provably ACKed
245 * (if we receive such a PN again, it must be ignored)
247 * However, if we were to track this state for every PN ever used in the history
248 * of a connection, the amount of state required would increase unboundedly as
249 * the connection goes on (for example, we would have to store a set of every PN
252 * RFC 9000 s. 12.3 continues:
254 * "Endpoints that track all individual packets for the purposes of detecting
255 * duplicates are at risk of accumulating excessive state. The data required
256 * for detecting duplicates can be limited by maintaining a minimum packet
257 * number below which all packets are immediately dropped."
259 * Moreover, RFC 9000 s. 13.2.3 states that:
261 * "A receiver MUST retain an ACK Range unless it can ensure that it will not
262 * subsequently accept packets with numbers in that range. Maintaining a
263 * minimum packet number that increases as ranges are discarded is one way to
264 * achieve this with minimal state."
266 * This touches on a subtlety of the original requirement quoted above: the
267 * receiver MUST discard a packet unless it is certain that it has not processed
268 * another packet with the same PN. However, this does not forbid the receiver
269 * from also discarding some PNs even though it has not yet processed them. In
270 * other words, implementations must be conservative and err in the direction of
271 * assuming a packet is a duplicate, but it is acceptable for this to come at
272 * the cost of falsely identifying some packets as duplicates.
274 * This allows us to bound the amount of state we must keep, and we adopt the
275 * suggested strategy quoted above to do so. We define a watermark PN below
276 * which all PNs are in the same state. This watermark is only ever increased.
277 * Thus the PNs the state for which needs to be explicitly tracked is limited to
278 * only a small number of recent PNs, and all older PNs have an assumed state.
280 * Any given PN thus falls into one of the following states:
282 * - (A) The PN is above the watermark but we have not yet received it.
284 * If we receive such a PN, we should process it and record the PN as
287 * - (B) The PN is above the watermark and we have received it.
289 * The PN should be included in any future ACK frame we generate.
290 * If we receive such a PN again, we should ignore it.
292 * - (C) The PN is below the watermark.
294 * We do not know whether a packet with the given PN was received or
295 * not. To be safe, if we receive such a packet, it is not processed.
297 * Note that state (C) corresponds to both "we have processed this PN and it has
298 * been provably ACKed" logical state and a subset of the PNs in the "we have
299 * never processed this PN before" logical state (namely all PNs which were lost
300 * and never received, but which are not recent enough to be above the
301 * watermark). The reason we can merge these states and avoid tracking states
302 * for the PNs in this state is because the provably ACKed and never-received
303 * states are functionally identical in terms of how we need to handle them: we
304 * don't need to do anything for PNs in either of these states, so we don't have
305 * to care about PNs in this state nor do we have to care about distinguishing
306 * the two states for a given PN.
308 * Note that under this scheme provably ACKed PNs are by definition always below
309 * the watermark; therefore, it follows that when a PN becomes provably ACKed,
310 * the watermark must be immediately increased to exceed it (otherwise we would
311 * keep reporting it in future ACK frames).
313 * This is in line with RFC 9000 s. 13.2.4's suggested strategy on when
314 * to advance the watermark:
316 * "When a packet containing an ACK frame is sent, the Largest Acknowledged
317 * field in that frame can be saved. When a packet containing an ACK frame is
318 * acknowledged, the receiver can stop acknowledging packets less than or
319 * equal to the Largest Acknowledged field in the sent ACK frame."
321 * This is where our scheme's false positives arise. When a packet containing an
322 * ACK frame is itself ACK'd, PNs referenced in that ACK frame become provably
323 * acked, and the watermark is bumped accordingly. However, the Largest
324 * Acknowledged field does not imply that all lower PNs have been received,
325 * because there may be gaps expressed in the ranges of PNs expressed by that
326 * and previous ACK frames. Thus, some unreceived PNs may be moved below the
327 * watermark, and we may subsequently reject those PNs as possibly being
328 * duplicates even though we have not actually received those PNs. Since we bump
329 * the watermark when a PN becomes provably ACKed, it follows that an unreceived
330 * PN falls below the watermark (and thus becomes a false positive for the
331 * purposes of duplicate detection) when a higher-numbered PN becomes provably
334 * Thus, when PN n becomes provably acked, any unreceived PNs in the range [0,
335 * n) will no longer be processed. Although datagrams may be reordered in the
336 * network, a PN we receive can only become provably ACKed after our own
337 * subsequently generated ACK frame is sent in a future TX packet, and then we
338 * receive another RX PN acknowleding that TX packet. This means that a given RX
339 * PN can only become provably ACKed at least 1 RTT after it is received; it is
340 * unlikely that any reordered datagrams will still be "in the network" (and not
341 * lost) by this time. If this does occur for whatever reason and a late PN is
342 * received, the packet will be discarded unprocessed and the PN is simply
343 * handled as though lost (a "written off" PN).
345 * **Data structure.** Our state for the RX handling side of the ACK manager, as
346 * discussed above, mainly comprises:
348 * a) a logical set of PNs, and
349 * b) a monotonically increasing PN counter (the watermark).
351 * For (a), we define a data structure which stores a logical set of PNs, which
352 * we use to keep track of which PNs we have received but which have not yet
353 * been provably ACKed, and thus will later need to generate an ACK frame for.
355 * The correspondance with the logical states discussed above is as follows. A
356 * PN is in state (C) if it is below the watermark; otherwise it is in state (B)
357 * if it is in the logical set of PNs, and in state (A) otherwise.
359 * Note that PNs are only removed from the PN set (when they become provably
360 * ACKed or written off) by virtue of advancement of the watermark. Removing PNs
361 * from the PN set any other way would be ambiguous as it would be
362 * indistinguishable from a PN we have not yet received and risk us processing a
363 * duplicate packet. In other words, for a given PN:
365 * - State (A) can transition to state (B) or (C)
366 * - State (B) can transition to state (C) only
367 * - State (C) is the terminal state
369 * We can query the logical set data structure for PNs which have been received
370 * but which have not been provably ACKed when we want to generate ACK frames.
371 * Since ACK frames can be lost and/or we might not know that the peer has
372 * successfully received them, we might generate multiple ACK frames covering a
373 * given PN until that PN becomes provably ACKed and we finally remove it from
374 * our set (by bumping the watermark) as no longer being our concern.
376 * The data structure supports the following operations:
378 * Insert Range: Adds an inclusive range of packet numbers [start, end]
379 * to the set. Equivalent to Insert for each number
380 * in the range. (Used when we receive a new PN.)
382 * Remove Range: Removes an inclusive range of packet numbers [start, end]
383 * from the set. Not all of the range need already be in
384 * the set, but any part of the range in the set is removed.
385 * (Used when bumping the watermark.)
387 * Query: Is a PN in the data structure?
389 * The data structure can be iterated.
391 * For greater efficiency in tracking large numbers of contiguous PNs, we track
392 * PN ranges rather than individual PNs. The data structure manages a list of PN
393 * ranges [[start, end]...]. Internally this is implemented as a doubly linked
394 * sorted list of range structures, which are automatically split and merged as
397 * This data structure requires O(n) traversal of the list for insertion,
398 * removal and query when we are not adding/removing ranges which are near the
399 * beginning or end of the set of ranges. It is expected that the number of PN
400 * ranges needed at any given time will generally be small and that most
401 * operations will be close to the beginning or end of the range.
403 * Invariant: The data structure is always sorted in ascending order by PN.
405 * Invariant: No two adjacent ranges ever 'border' one another (have no
406 * numerical gap between them) as the data structure always ensures
407 * such ranges are merged.
409 * Invariant: No two ranges ever overlap.
411 * Invariant: No range [a, b] ever has a > b.
413 * Invariant: Since ranges are represented using inclusive bounds, no range
414 * item inside the data structure can represent a span of zero PNs.
416 * **Possible duplicates.** A PN is considered a possible duplicate when either:
418 * a) its PN is already in the PN set (i.e. has already been received), or
419 * b) its PN is below the watermark (i.e. was provably ACKed or written off).
421 * A packet with a given PN is considered 'processable' when that PN is not
422 * considered a possible duplicate (see ossl_ackm_is_rx_pn_processable).
424 * **TX/RX interaction.** The watermark is bumped whenever an RX packet becomes
425 * provably ACKed. This occurs when an ACK frame is received by the TX side of
426 * the ACK manager; thus, there is necessary interaction between the TX and RX
427 * sides of the ACK manager.
429 * This is implemented as follows. When a packet is queued as sent in the TX
430 * side of the ACK manager, it may optionally have a Largest Acked value set on
431 * it. The user of the ACK manager should do this if the packet being
432 * transmitted contains an ACK frame, by setting the field to the Largest Acked
433 * field of that frame. Otherwise, this field should be set to QUIC_PN_INVALID.
434 * When a TX packet is eventually acknowledged which has this field set, it is
435 * used to update the state of the RX side of the ACK manager by bumping the
436 * watermark accordingly.
438 struct pn_set_item_st
{
439 struct pn_set_item_st
*prev
, *next
;
440 OSSL_QUIC_ACK_RANGE range
;
444 struct pn_set_item_st
*head
, *tail
;
446 /* Number of ranges (not PNs) in the set. */
450 static void pn_set_init(struct pn_set_st
*s
)
452 s
->head
= s
->tail
= NULL
;
456 static void pn_set_destroy(struct pn_set_st
*s
)
458 struct pn_set_item_st
*x
, *xnext
;
460 for (x
= s
->head
; x
!= NULL
; x
= xnext
) {
466 /* Possible merge of x, x->prev */
467 static void pn_set_merge_adjacent(struct pn_set_st
*s
, struct pn_set_item_st
*x
)
469 struct pn_set_item_st
*xprev
= x
->prev
;
474 if (x
->range
.start
- 1 != xprev
->range
.end
)
477 x
->range
.start
= xprev
->range
.start
;
478 x
->prev
= xprev
->prev
;
482 if (s
->head
== xprev
)
489 /* Returns 1 if there exists a PN x which falls within both ranges a and b. */
490 static int pn_range_overlaps(const OSSL_QUIC_ACK_RANGE
*a
,
491 const OSSL_QUIC_ACK_RANGE
*b
)
493 return ossl_quic_pn_min(a
->end
, b
->end
)
494 >= ossl_quic_pn_max(a
->start
, b
->start
);
498 * Insert a range into a PN set. Returns 0 on allocation failure, in which case
499 * the PN set is in a valid but undefined state. Otherwise, returns 1. Ranges
500 * can overlap existing ranges without limitation. If a range is a subset of
501 * an existing range in the set, this is a no-op and returns 1.
503 static int pn_set_insert(struct pn_set_st
*s
, const OSSL_QUIC_ACK_RANGE
*range
)
505 struct pn_set_item_st
*x
, *z
, *xnext
, *f
, *fnext
;
506 QUIC_PN start
= range
->start
, end
= range
->end
;
508 if (!ossl_assert(start
<= end
))
511 if (s
->head
== NULL
) {
512 /* Nothing in the set yet, so just add this range. */
513 x
= OPENSSL_zalloc(sizeof(struct pn_set_item_st
));
517 x
->range
.start
= start
;
519 s
->head
= s
->tail
= x
;
524 if (start
> s
->tail
->range
.end
) {
526 * Range is after the latest range in the set, so append.
528 * Note: The case where the range is before the earliest range in the
529 * set is handled as a degenerate case of the final case below. See
530 * optimization note (*) below.
532 if (s
->tail
->range
.end
+ 1 == start
) {
533 s
->tail
->range
.end
= end
;
537 x
= OPENSSL_zalloc(sizeof(struct pn_set_item_st
));
541 x
->range
.start
= start
;
551 if (start
<= s
->head
->range
.start
&& end
>= s
->tail
->range
.end
) {
553 * New range dwarfs all ranges in our set.
555 * Free everything except the first range in the set, which we scavenge
558 for (x
= s
->head
->next
; x
!= NULL
; x
= xnext
) {
563 s
->head
->range
.start
= start
;
564 s
->head
->range
.end
= end
;
565 s
->head
->next
= s
->head
->prev
= NULL
;
572 * Walk backwards since we will most often be inserting at the end. As an
573 * optimization, test the head node first and skip iterating over the
574 * entire list if we are inserting at the start. The assumption is that
575 * insertion at the start and end of the space will be the most common
578 z
= end
< s
->head
->range
.start
? s
->head
: s
->tail
;
580 for (; z
!= NULL
; z
= z
->prev
) {
581 /* An existing range dwarfs our new range (optimisation). */
582 if (z
->range
.start
<= start
&& z
->range
.end
>= end
)
585 if (pn_range_overlaps(&z
->range
, range
)) {
587 * Our new range overlaps an existing range, or possibly several
590 struct pn_set_item_st
*ovend
= z
;
591 OSSL_QUIC_ACK_RANGE t
;
594 t
.end
= ossl_quic_pn_max(end
, z
->range
.end
);
596 /* Get earliest overlapping range. */
597 for (; z
->prev
!= NULL
&& pn_range_overlaps(&z
->prev
->range
, range
);
600 t
.start
= ossl_quic_pn_min(start
, z
->range
.start
);
602 /* Replace sequence of nodes z..ovend with ovend only. */
604 ovend
->prev
= z
->prev
;
606 z
->prev
->next
= ovend
;
610 /* Free now unused nodes. */
611 for (f
= z
; f
!= ovend
; f
= fnext
, ++n
) {
618 } else if (end
< z
->range
.start
619 && (z
->prev
== NULL
|| start
> z
->prev
->range
.end
)) {
620 if (z
->range
.start
== end
+ 1) {
621 /* We can extend the following range backwards. */
622 z
->range
.start
= start
;
625 * If this closes a gap we now need to merge
628 pn_set_merge_adjacent(s
, z
);
629 } else if (z
->prev
!= NULL
&& z
->prev
->range
.end
+ 1 == start
) {
630 /* We can extend the preceding range forwards. */
631 z
->prev
->range
.end
= end
;
634 * If this closes a gap we now need to merge
637 pn_set_merge_adjacent(s
, z
);
640 * The new interval is between intervals without overlapping or
641 * touching them, so insert between, preserving sort.
643 x
= OPENSSL_zalloc(sizeof(struct pn_set_item_st
));
647 x
->range
.start
= start
;
668 * Remove a range from the set. Returns 0 on allocation failure, in which case
669 * the PN set is unchanged. Otherwise, returns 1. Ranges which are not already
670 * in the set can be removed without issue. If a passed range is not in the PN
671 * set at all, this is a no-op and returns 1.
673 static int pn_set_remove(struct pn_set_st
*s
, const OSSL_QUIC_ACK_RANGE
*range
)
675 struct pn_set_item_st
*z
, *zprev
, *y
;
676 QUIC_PN start
= range
->start
, end
= range
->end
;
678 if (!ossl_assert(start
<= end
))
681 /* Walk backwards since we will most often be removing at the end. */
682 for (z
= s
->tail
; z
!= NULL
; z
= zprev
) {
685 if (start
> z
->range
.end
)
686 /* No overlapping ranges can exist beyond this point, so stop. */
689 if (start
<= z
->range
.start
&& end
>= z
->range
.end
) {
691 * The range being removed dwarfs this range, so it should be
695 z
->next
->prev
= z
->prev
;
697 z
->prev
->next
= z
->next
;
705 } else if (start
<= z
->range
.start
) {
707 * The range being removed includes start of this range, but does
708 * not cover the entire range (as this would be caught by the case
709 * above). Shorten the range.
711 assert(end
< z
->range
.end
);
712 z
->range
.start
= end
+ 1;
713 } else if (end
>= z
->range
.end
) {
715 * The range being removed includes the end of this range, but does
716 * not cover the entire range (as this would be caught by the case
717 * above). Shorten the range. We can also stop iterating.
719 assert(start
> z
->range
.start
);
721 z
->range
.end
= start
- 1;
723 } else if (start
> z
->range
.start
&& end
< z
->range
.end
) {
725 * The range being removed falls entirely in this range, so cut it
726 * into two. Cases where a zero-length range would be created are
727 * handled by the above cases.
729 y
= OPENSSL_zalloc(sizeof(struct pn_set_item_st
));
733 y
->range
.end
= z
->range
.end
;
734 y
->range
.start
= end
+ 1;
740 z
->range
.end
= start
- 1;
749 /* Assert no partial overlap; all cases should be covered above. */
750 assert(!pn_range_overlaps(&z
->range
, range
));
757 /* Returns 1 iff the given PN is in the PN set. */
758 static int pn_set_query(const struct pn_set_st
*s
, QUIC_PN pn
)
760 struct pn_set_item_st
*x
;
765 for (x
= s
->tail
; x
!= NULL
; x
= x
->prev
)
766 if (x
->range
.start
<= pn
&& x
->range
.end
>= pn
)
768 else if (x
->range
.end
< pn
)
774 struct rx_pkt_history_st
{
775 struct pn_set_st set
;
778 * Invariant: PNs below this are not in the set.
779 * Invariant: This is monotonic and only ever increases.
784 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st
*h
,
787 static void rx_pkt_history_init(struct rx_pkt_history_st
*h
)
789 pn_set_init(&h
->set
);
793 static void rx_pkt_history_destroy(struct rx_pkt_history_st
*h
)
795 pn_set_destroy(&h
->set
);
799 * Limit the number of ACK ranges we store to prevent resource consumption DoS
802 #define MAX_RX_ACK_RANGES 32
804 static void rx_pkt_history_trim_range_count(struct rx_pkt_history_st
*h
)
806 QUIC_PN highest
= QUIC_PN_INVALID
;
808 while (h
->set
.num_ranges
> MAX_RX_ACK_RANGES
) {
809 OSSL_QUIC_ACK_RANGE r
= h
->set
.head
->range
;
811 highest
= (highest
== QUIC_PN_INVALID
)
812 ? r
.end
: ossl_quic_pn_max(highest
, r
.end
);
814 pn_set_remove(&h
->set
, &r
);
818 * Bump watermark to cover all PNs we removed to avoid accidential
819 * reprocessing of packets.
821 if (highest
!= QUIC_PN_INVALID
)
822 rx_pkt_history_bump_watermark(h
, highest
+ 1);
825 static int rx_pkt_history_add_pn(struct rx_pkt_history_st
*h
,
828 OSSL_QUIC_ACK_RANGE r
;
833 if (pn
< h
->watermark
)
834 return 1; /* consider this a success case */
836 if (pn_set_insert(&h
->set
, &r
) != 1)
839 rx_pkt_history_trim_range_count(h
);
843 static int rx_pkt_history_bump_watermark(struct rx_pkt_history_st
*h
,
846 OSSL_QUIC_ACK_RANGE r
;
848 if (watermark
<= h
->watermark
)
851 /* Remove existing PNs below the watermark. */
853 r
.end
= watermark
- 1;
854 if (pn_set_remove(&h
->set
, &r
) != 1)
857 h
->watermark
= watermark
;
862 * ACK Manager Implementation
863 * **************************
864 * Implementation of the ACK manager proper.
867 /* Constants used by the ACK manager; see RFC 9002. */
868 #define K_GRANULARITY (1 * OSSL_TIME_MS)
869 #define K_PKT_THRESHOLD 3
870 #define K_TIME_THRESHOLD_NUM 9
871 #define K_TIME_THRESHOLD_DEN 8
873 /* The maximum number of times we allow PTO to be doubled. */
874 #define MAX_PTO_COUNT 16
876 struct ossl_ackm_st
{
877 /* Our list of transmitted packets. Corresponds to RFC 9002 sent_packets. */
878 struct tx_pkt_history_st tx_history
[QUIC_PN_SPACE_NUM
];
880 /* Our list of received PNs which are not yet provably acked. */
881 struct rx_pkt_history_st rx_history
[QUIC_PN_SPACE_NUM
];
883 /* Polymorphic dependencies that we consume. */
884 OSSL_TIME (*now
)(void *arg
);
887 const OSSL_CC_METHOD
*cc_method
;
888 OSSL_CC_DATA
*cc_data
;
890 /* RFC 9002 variables. */
892 QUIC_PN largest_acked_pkt
[QUIC_PN_SPACE_NUM
];
893 OSSL_TIME time_of_last_ack_eliciting_pkt
[QUIC_PN_SPACE_NUM
];
894 OSSL_TIME loss_time
[QUIC_PN_SPACE_NUM
];
895 OSSL_TIME loss_detection_deadline
;
897 /* Lowest PN which is still not known to be ACKed. */
898 QUIC_PN lowest_unacked_pkt
[QUIC_PN_SPACE_NUM
];
900 /* Time at which we got our first RTT sample, or 0. */
901 OSSL_TIME first_rtt_sample
;
904 * A packet's num_bytes are added to this if it is inflight,
905 * and removed again once ack'd/lost/discarded.
907 uint64_t bytes_in_flight
;
910 * A packet's num_bytes are added to this if it is both inflight and
911 * ack-eliciting, and removed again once ack'd/lost/discarded.
913 uint64_t ack_eliciting_bytes_in_flight
[QUIC_PN_SPACE_NUM
];
915 /* Count of ECN-CE events. */
916 uint64_t peer_ecnce
[QUIC_PN_SPACE_NUM
];
918 /* Set to 1 when the handshake is confirmed. */
919 char handshake_confirmed
;
921 /* Set to 1 when the peer has completed address validation. */
922 char peer_completed_addr_validation
;
924 /* Set to 1 when a PN space has been discarded. */
925 char discarded
[QUIC_PN_SPACE_NUM
];
927 /* Set to 1 when we think an ACK frame should be generated. */
928 char rx_ack_desired
[QUIC_PN_SPACE_NUM
];
930 /* Set to 1 if an ACK frame has ever been generated. */
931 char rx_ack_generated
[QUIC_PN_SPACE_NUM
];
933 /* Probe request counts for reporting to the user. */
934 OSSL_ACKM_PROBE_INFO pending_probe
;
936 /* Generated ACK frames for each PN space. */
937 OSSL_QUIC_FRAME_ACK ack
[QUIC_PN_SPACE_NUM
];
938 OSSL_QUIC_ACK_RANGE ack_ranges
[QUIC_PN_SPACE_NUM
][MAX_RX_ACK_RANGES
];
940 /* Other RX state. */
941 /* Largest PN we have RX'd. */
942 QUIC_PN rx_largest_pn
[QUIC_PN_SPACE_NUM
];
944 /* Time at which the PN in rx_largest_pn was RX'd. */
945 OSSL_TIME rx_largest_time
[QUIC_PN_SPACE_NUM
];
948 * ECN event counters. Each time we receive a packet with a given ECN label,
949 * the corresponding ECN counter here is incremented.
951 uint64_t rx_ect0
[QUIC_PN_SPACE_NUM
];
952 uint64_t rx_ect1
[QUIC_PN_SPACE_NUM
];
953 uint64_t rx_ecnce
[QUIC_PN_SPACE_NUM
];
956 * Number of ACK-eliciting packets since last ACK. We use this to defer
957 * emitting ACK frames until a threshold number of ACK-eliciting packets
958 * have been received.
960 uint32_t rx_ack_eliciting_pkts_since_last_ack
[QUIC_PN_SPACE_NUM
];
963 * The ACK frame coalescing deadline at which we should flush any unsent ACK
966 OSSL_TIME rx_ack_flush_deadline
[QUIC_PN_SPACE_NUM
];
968 /* Callbacks for deadline updates. */
969 void (*loss_detection_deadline_cb
)(OSSL_TIME deadline
, void *arg
);
970 void *loss_detection_deadline_cb_arg
;
972 void (*ack_deadline_cb
)(OSSL_TIME deadline
, int pkt_space
, void *arg
);
973 void *ack_deadline_cb_arg
;
976 static ossl_inline
uint32_t min_u32(uint32_t x
, uint32_t y
)
978 return x
< y
? x
: y
;
982 * Get TX history for a given packet number space. Must not have been
985 static struct tx_pkt_history_st
*get_tx_history(OSSL_ACKM
*ackm
, int pkt_space
)
987 assert(!ackm
->discarded
[pkt_space
]);
989 return &ackm
->tx_history
[pkt_space
];
993 * Get RX history for a given packet number space. Must not have been
996 static struct rx_pkt_history_st
*get_rx_history(OSSL_ACKM
*ackm
, int pkt_space
)
998 assert(!ackm
->discarded
[pkt_space
]);
1000 return &ackm
->rx_history
[pkt_space
];
1003 /* Does the newly-acknowledged list contain any ack-eliciting packet? */
1004 static int ack_includes_ack_eliciting(OSSL_ACKM_TX_PKT
*pkt
)
1006 for (; pkt
!= NULL
; pkt
= pkt
->anext
)
1007 if (pkt
->is_ack_eliciting
)
1013 /* Return number of ACK-eliciting bytes in flight across all PN spaces. */
1014 static uint64_t ackm_ack_eliciting_bytes_in_flight(OSSL_ACKM
*ackm
)
1019 for (i
= 0; i
< QUIC_PN_SPACE_NUM
; ++i
)
1020 total
+= ackm
->ack_eliciting_bytes_in_flight
[i
];
1025 /* Return 1 if the range contains the given PN. */
1026 static int range_contains(const OSSL_QUIC_ACK_RANGE
*range
, QUIC_PN pn
)
1028 return pn
>= range
->start
&& pn
<= range
->end
;
1032 * Given a logical representation of an ACK frame 'ack', create a singly-linked
1033 * list of the newly ACK'd frames; that is, of frames which are matched by the
1034 * list of PN ranges contained in the ACK frame. The packet structures in the
1035 * list returned are removed from the TX history list. Returns a pointer to the
1036 * list head (or NULL) if empty.
1038 static OSSL_ACKM_TX_PKT
*ackm_detect_and_remove_newly_acked_pkts(OSSL_ACKM
*ackm
,
1039 const OSSL_QUIC_FRAME_ACK
*ack
,
1042 OSSL_ACKM_TX_PKT
*acked_pkts
= NULL
, **fixup
= &acked_pkts
, *pkt
, *pprev
;
1043 struct tx_pkt_history_st
*h
;
1046 assert(ack
->num_ack_ranges
> 0);
1049 * Our history list is a list of packets sorted in ascending order
1052 * ack->ack_ranges is a list of packet number ranges in descending order.
1054 * Walk through our history list from the end in order to efficiently detect
1055 * membership in the specified ack ranges. As an optimization, we use our
1056 * hashtable to try and skip to the first matching packet. This may fail if
1057 * the ACK ranges given include nonexistent packets.
1059 h
= get_tx_history(ackm
, pkt_space
);
1061 pkt
= tx_pkt_history_by_pkt_num(h
, ack
->ack_ranges
[0].end
);
1065 for (; pkt
!= NULL
; pkt
= pprev
) {
1067 * Save prev value as it will be zeroed if we remove the packet from the
1068 * history list below.
1073 if (ridx
>= ack
->num_ack_ranges
) {
1075 * We have exhausted all ranges so stop here, even if there are
1076 * more packets to look at.
1081 if (range_contains(&ack
->ack_ranges
[ridx
], pkt
->pkt_num
)) {
1082 /* We have matched this range. */
1083 tx_pkt_history_remove(h
, pkt
->pkt_num
);
1086 fixup
= &pkt
->anext
;
1089 } else if (pkt
->pkt_num
> ack
->ack_ranges
[ridx
].end
) {
1091 * We have not reached this range yet in our list, so do not
1097 * We have moved beyond this range, so advance to the next range
1098 * and try matching again.
1100 assert(pkt
->pkt_num
< ack
->ack_ranges
[ridx
].start
);
1111 * Create a singly-linked list of newly detected-lost packets in the given
1112 * packet number space. Returns the head of the list or NULL if no packets were
1113 * detected lost. The packets in the list are removed from the TX history list.
1115 static OSSL_ACKM_TX_PKT
*ackm_detect_and_remove_lost_pkts(OSSL_ACKM
*ackm
,
1118 OSSL_ACKM_TX_PKT
*lost_pkts
= NULL
, **fixup
= &lost_pkts
, *pkt
, *pnext
;
1119 OSSL_TIME loss_delay
, lost_send_time
, now
;
1121 struct tx_pkt_history_st
*h
;
1123 assert(ackm
->largest_acked_pkt
[pkt_space
] != QUIC_PN_INVALID
);
1125 ossl_statm_get_rtt_info(ackm
->statm
, &rtt
);
1127 ackm
->loss_time
[pkt_space
] = 0;
1129 loss_delay
= ossl_time_multiply(K_TIME_THRESHOLD_NUM
,
1130 ossl_time_max(rtt
.latest_rtt
,
1132 loss_delay
= ossl_time_divide(loss_delay
, K_TIME_THRESHOLD_DEN
);
1134 /* Minimum time of K_GRANULARITY before packets are deemed lost. */
1135 loss_delay
= ossl_time_max(loss_delay
, K_GRANULARITY
);
1137 /* Packets sent before this time are deemed lost. */
1138 now
= ackm
->now(ackm
->now_arg
);
1139 lost_send_time
= ossl_time_subtract(now
, loss_delay
);
1141 h
= get_tx_history(ackm
, pkt_space
);
1144 for (; pkt
!= NULL
; pkt
= pnext
) {
1145 assert(pkt_space
== pkt
->pkt_space
);
1148 * Save prev value as it will be zeroed if we remove the packet from the
1149 * history list below.
1153 if (pkt
->pkt_num
> ackm
->largest_acked_pkt
[pkt_space
])
1157 * Mark packet as lost, or set time when it should be marked.
1159 if (ossl_time_compare(pkt
->time
, lost_send_time
) <= 0
1160 || ackm
->largest_acked_pkt
[pkt_space
]
1161 >= pkt
->pkt_num
+ K_PKT_THRESHOLD
) {
1162 tx_pkt_history_remove(h
, pkt
->pkt_num
);
1165 fixup
= &pkt
->lnext
;
1168 if (ossl_time_is_zero(ackm
->loss_time
[pkt_space
]))
1169 ackm
->loss_time
[pkt_space
] =
1170 ossl_time_add(pkt
->time
, loss_delay
);
1172 ackm
->loss_time
[pkt_space
] =
1173 ossl_time_min(ackm
->loss_time
[pkt_space
],
1174 ossl_time_add(pkt
->time
, loss_delay
));
1181 static OSSL_TIME
ackm_get_loss_time_and_space(OSSL_ACKM
*ackm
, int *pspace
)
1183 OSSL_TIME time
= ackm
->loss_time
[QUIC_PN_SPACE_INITIAL
];
1184 int i
, space
= QUIC_PN_SPACE_INITIAL
;
1186 for (i
= space
+ 1; i
< QUIC_PN_SPACE_NUM
; ++i
)
1187 if (ossl_time_is_zero(time
)
1188 || ossl_time_compare(ackm
->loss_time
[i
], time
) == -1) {
1189 time
= ackm
->loss_time
[i
];
1197 static OSSL_TIME
ackm_get_pto_time_and_space(OSSL_ACKM
*ackm
, int *space
)
1201 OSSL_TIME pto_timeout
= OSSL_TIME_INFINITY
, t
;
1202 int pto_space
= QUIC_PN_SPACE_INITIAL
, i
;
1204 ossl_statm_get_rtt_info(ackm
->statm
, &rtt
);
1207 = ossl_time_add(rtt
.smoothed_rtt
,
1208 ossl_time_max(ossl_time_multiply(4, rtt
.rtt_variance
),
1212 = ossl_time_multiply(duration
, 1U << min_u32(ackm
->pto_count
,
1215 /* Anti-deadlock PTO starts from the current time. */
1216 if (ackm_ack_eliciting_bytes_in_flight(ackm
) == 0) {
1217 assert(!ackm
->peer_completed_addr_validation
);
1219 *space
= ackm
->discarded
[QUIC_PN_SPACE_INITIAL
]
1220 ? QUIC_PN_SPACE_HANDSHAKE
1221 : QUIC_PN_SPACE_INITIAL
;
1222 return ossl_time_add(ackm
->now(ackm
->now_arg
), duration
);
1225 for (i
= QUIC_PN_SPACE_INITIAL
; i
< QUIC_PN_SPACE_NUM
; ++i
) {
1226 if (ackm
->ack_eliciting_bytes_in_flight
[i
] == 0)
1229 if (i
== QUIC_PN_SPACE_APP
) {
1230 /* Skip application data until handshake confirmed. */
1231 if (!ackm
->handshake_confirmed
)
1234 /* Include max_ack_delay and backoff for app data. */
1235 if (!ossl_time_is_infinity(rtt
.max_ack_delay
))
1237 = ossl_time_add(duration
,
1238 ossl_time_multiply(rtt
.max_ack_delay
,
1239 1U << min_u32(ackm
->pto_count
,
1243 t
= ossl_time_add(ackm
->time_of_last_ack_eliciting_pkt
[i
], duration
);
1244 if (t
< pto_timeout
) {
1254 static void ackm_set_loss_detection_timer_actual(OSSL_ACKM
*ackm
,
1257 ackm
->loss_detection_deadline
= deadline
;
1259 if (ackm
->loss_detection_deadline_cb
!= NULL
)
1260 ackm
->loss_detection_deadline_cb(deadline
,
1261 ackm
->loss_detection_deadline_cb_arg
);
1264 static int ackm_set_loss_detection_timer(OSSL_ACKM
*ackm
)
1267 OSSL_TIME earliest_loss_time
, timeout
;
1269 earliest_loss_time
= ackm_get_loss_time_and_space(ackm
, &space
);
1270 if (!ossl_time_is_zero(earliest_loss_time
)) {
1271 /* Time threshold loss detection. */
1272 ackm_set_loss_detection_timer_actual(ackm
, earliest_loss_time
);
1276 if (ackm_ack_eliciting_bytes_in_flight(ackm
) == 0
1277 && ackm
->peer_completed_addr_validation
) {
1279 * Nothing to detect lost, so no timer is set. However, the client
1280 * needs to arm the timer if the server might be blocked by the
1281 * anti-amplification limit.
1283 ackm_set_loss_detection_timer_actual(ackm
, OSSL_TIME_ZERO
);
1287 timeout
= ackm_get_pto_time_and_space(ackm
, &space
);
1288 ackm_set_loss_detection_timer_actual(ackm
, timeout
);
1292 static int ackm_in_persistent_congestion(OSSL_ACKM
*ackm
,
1293 const OSSL_ACKM_TX_PKT
*lpkt
)
1295 /* Persistent congestion not currently implemented. */
1299 static void ackm_on_pkts_lost(OSSL_ACKM
*ackm
, int pkt_space
,
1300 const OSSL_ACKM_TX_PKT
*lpkt
)
1302 const OSSL_ACKM_TX_PKT
*p
, *pnext
;
1304 QUIC_PN largest_pn_lost
= 0;
1305 uint64_t num_bytes
= 0;
1307 for (p
= lpkt
; p
!= NULL
; p
= pnext
) {
1310 if (p
->is_inflight
) {
1311 ackm
->bytes_in_flight
-= p
->num_bytes
;
1312 if (p
->is_ack_eliciting
)
1313 ackm
->ack_eliciting_bytes_in_flight
[p
->pkt_space
]
1316 if (p
->pkt_num
> largest_pn_lost
)
1317 largest_pn_lost
= p
->pkt_num
;
1319 num_bytes
+= p
->num_bytes
;
1322 p
->on_lost(p
->cb_arg
);
1326 * Only consider lost packets with regards to congestion after getting an
1329 ossl_statm_get_rtt_info(ackm
->statm
, &rtt
);
1331 if (ackm
->first_rtt_sample
== 0)
1334 ackm
->cc_method
->on_data_lost(ackm
->cc_data
,
1336 ackm
->tx_history
[pkt_space
].highest_sent
,
1338 ackm_in_persistent_congestion(ackm
, lpkt
));
1341 static void ackm_on_pkts_acked(OSSL_ACKM
*ackm
, const OSSL_ACKM_TX_PKT
*apkt
)
1343 const OSSL_ACKM_TX_PKT
*anext
;
1345 uint64_t num_retransmittable_bytes
= 0;
1346 QUIC_PN last_pn_acked
= 0;
1348 now
= ackm
->now(ackm
->now_arg
);
1350 for (; apkt
!= NULL
; apkt
= anext
) {
1351 if (apkt
->is_inflight
) {
1352 ackm
->bytes_in_flight
-= apkt
->num_bytes
;
1353 if (apkt
->is_ack_eliciting
)
1354 ackm
->ack_eliciting_bytes_in_flight
[apkt
->pkt_space
]
1357 num_retransmittable_bytes
+= apkt
->num_bytes
;
1358 if (apkt
->pkt_num
> last_pn_acked
)
1359 last_pn_acked
= apkt
->pkt_num
;
1361 if (apkt
->largest_acked
!= QUIC_PN_INVALID
)
1363 * This can fail, but it is monotonic; worst case we try again
1366 rx_pkt_history_bump_watermark(get_rx_history(ackm
,
1368 apkt
->largest_acked
+ 1);
1371 anext
= apkt
->anext
;
1372 apkt
->on_acked(apkt
->cb_arg
); /* may free apkt */
1375 ackm
->cc_method
->on_data_acked(ackm
->cc_data
, now
,
1376 last_pn_acked
, num_retransmittable_bytes
);
1379 OSSL_ACKM
*ossl_ackm_new(OSSL_TIME (*now
)(void *arg
),
1382 const OSSL_CC_METHOD
*cc_method
,
1383 OSSL_CC_DATA
*cc_data
)
1388 ackm
= OPENSSL_zalloc(sizeof(OSSL_ACKM
));
1392 for (i
= 0; i
< (int)OSSL_NELEM(ackm
->tx_history
); ++i
) {
1393 ackm
->largest_acked_pkt
[i
] = QUIC_PN_INVALID
;
1394 ackm
->rx_ack_flush_deadline
[i
] = OSSL_TIME_INFINITY
;
1395 if (tx_pkt_history_init(&ackm
->tx_history
[i
]) < 1)
1399 for (i
= 0; i
< (int)OSSL_NELEM(ackm
->rx_history
); ++i
)
1400 rx_pkt_history_init(&ackm
->rx_history
[i
]);
1403 ackm
->now_arg
= now_arg
;
1404 ackm
->statm
= statm
;
1405 ackm
->cc_method
= cc_method
;
1406 ackm
->cc_data
= cc_data
;
1411 tx_pkt_history_destroy(&ackm
->tx_history
[i
]);
1417 void ossl_ackm_free(OSSL_ACKM
*ackm
)
1424 for (i
= 0; i
< OSSL_NELEM(ackm
->tx_history
); ++i
)
1425 if (!ackm
->discarded
[i
]) {
1426 tx_pkt_history_destroy(&ackm
->tx_history
[i
]);
1427 rx_pkt_history_destroy(&ackm
->rx_history
[i
]);
1433 int ossl_ackm_on_tx_packet(OSSL_ACKM
*ackm
, OSSL_ACKM_TX_PKT
*pkt
)
1435 struct tx_pkt_history_st
*h
= get_tx_history(ackm
, pkt
->pkt_space
);
1437 /* Time must be set and not move backwards. */
1438 if (ossl_time_is_zero(pkt
->time
)
1439 || ossl_time_compare(ackm
->time_of_last_ack_eliciting_pkt
[pkt
->pkt_space
],
1443 /* Must have non-zero number of bytes. */
1444 if (pkt
->num_bytes
== 0)
1447 if (tx_pkt_history_add(h
, pkt
) == 0)
1450 if (pkt
->is_inflight
) {
1451 if (pkt
->is_ack_eliciting
) {
1452 ackm
->time_of_last_ack_eliciting_pkt
[pkt
->pkt_space
] = pkt
->time
;
1453 ackm
->ack_eliciting_bytes_in_flight
[pkt
->pkt_space
]
1457 ackm
->bytes_in_flight
+= pkt
->num_bytes
;
1458 ackm_set_loss_detection_timer(ackm
);
1460 ackm
->cc_method
->on_data_sent(ackm
->cc_data
, pkt
->num_bytes
);
1466 int ossl_ackm_on_rx_datagram(OSSL_ACKM
*ackm
, size_t num_bytes
)
1468 /* No-op on the client. */
1472 static void ackm_on_congestion(OSSL_ACKM
*ackm
, OSSL_TIME send_time
)
1474 /* Not currently implemented. */
1477 static void ackm_process_ecn(OSSL_ACKM
*ackm
, const OSSL_QUIC_FRAME_ACK
*ack
,
1480 struct tx_pkt_history_st
*h
;
1481 OSSL_ACKM_TX_PKT
*pkt
;
1484 * If the ECN-CE counter reported by the peer has increased, this could
1485 * be a new congestion event.
1487 if (ack
->ecnce
> ackm
->peer_ecnce
[pkt_space
]) {
1488 ackm
->peer_ecnce
[pkt_space
] = ack
->ecnce
;
1490 h
= get_tx_history(ackm
, pkt_space
);
1491 pkt
= tx_pkt_history_by_pkt_num(h
, ack
->ack_ranges
[0].end
);
1495 ackm_on_congestion(ackm
, pkt
->time
);
1499 int ossl_ackm_on_rx_ack_frame(OSSL_ACKM
*ackm
, const OSSL_QUIC_FRAME_ACK
*ack
,
1500 int pkt_space
, OSSL_TIME rx_time
)
1502 OSSL_ACKM_TX_PKT
*na_pkts
, *lost_pkts
;
1503 int must_set_timer
= 0;
1505 if (ackm
->largest_acked_pkt
[pkt_space
] == QUIC_PN_INVALID
)
1506 ackm
->largest_acked_pkt
[pkt_space
] = ack
->ack_ranges
[0].end
;
1508 ackm
->largest_acked_pkt
[pkt_space
]
1509 = ossl_quic_pn_max(ackm
->largest_acked_pkt
[pkt_space
],
1510 ack
->ack_ranges
[0].end
);
1513 * If we get an ACK in the handshake space, address validation is completed.
1514 * Make sure we update the timer, even if no packets were ACK'd.
1516 if (!ackm
->peer_completed_addr_validation
1517 && pkt_space
== QUIC_PN_SPACE_HANDSHAKE
) {
1518 ackm
->peer_completed_addr_validation
= 1;
1523 * Find packets that are newly acknowledged and remove them from the list.
1525 na_pkts
= ackm_detect_and_remove_newly_acked_pkts(ackm
, ack
, pkt_space
);
1526 if (na_pkts
== NULL
) {
1528 ackm_set_loss_detection_timer(ackm
);
1534 * Update the RTT if the largest acknowledged is newly acked and at least
1535 * one ACK-eliciting packet was newly acked.
1537 * First packet in the list is always the one with the largest PN.
1539 if (na_pkts
->pkt_num
== ack
->ack_ranges
[0].end
&&
1540 ack_includes_ack_eliciting(na_pkts
)) {
1541 OSSL_TIME now
= ackm
->now(ackm
->now_arg
), ack_delay
;
1542 if (ossl_time_is_zero(ackm
->first_rtt_sample
))
1543 ackm
->first_rtt_sample
= now
;
1545 /* Enforce maximum ACK delay. */
1546 ack_delay
= ack
->delay_time
;
1547 if (ackm
->handshake_confirmed
) {
1550 ossl_statm_get_rtt_info(ackm
->statm
, &rtt
);
1551 ack_delay
= ossl_time_min(ack_delay
, rtt
.max_ack_delay
);
1554 ossl_statm_update_rtt(ackm
->statm
, ack_delay
,
1555 ossl_time_subtract(now
, na_pkts
->time
));
1558 /* Process ECN information if present. */
1559 if (ack
->ecn_present
)
1560 ackm_process_ecn(ackm
, ack
, pkt_space
);
1562 /* Handle inferred loss. */
1563 lost_pkts
= ackm_detect_and_remove_lost_pkts(ackm
, pkt_space
);
1564 if (lost_pkts
!= NULL
)
1565 ackm_on_pkts_lost(ackm
, pkt_space
, lost_pkts
);
1567 ackm_on_pkts_acked(ackm
, na_pkts
);
1570 * Reset pto_count unless the client is unsure if the server validated the
1573 if (ackm
->peer_completed_addr_validation
)
1574 ackm
->pto_count
= 0;
1576 ackm_set_loss_detection_timer(ackm
);
1580 int ossl_ackm_on_pkt_space_discarded(OSSL_ACKM
*ackm
, int pkt_space
)
1582 OSSL_ACKM_TX_PKT
*pkt
, *pnext
;
1583 uint64_t num_bytes_invalidated
= 0;
1585 assert(pkt_space
< QUIC_PN_SPACE_APP
);
1587 if (ackm
->discarded
[pkt_space
])
1590 if (pkt_space
== QUIC_PN_SPACE_HANDSHAKE
)
1591 ackm
->peer_completed_addr_validation
= 1;
1593 for (pkt
= get_tx_history(ackm
, pkt_space
)->head
; pkt
!= NULL
; pkt
= pnext
) {
1595 if (pkt
->is_inflight
) {
1596 ackm
->bytes_in_flight
-= pkt
->num_bytes
;
1597 num_bytes_invalidated
+= pkt
->num_bytes
;
1600 pkt
->on_discarded(pkt
->cb_arg
); /* may free pkt */
1603 tx_pkt_history_destroy(&ackm
->tx_history
[pkt_space
]);
1604 rx_pkt_history_destroy(&ackm
->rx_history
[pkt_space
]);
1606 if (num_bytes_invalidated
> 0)
1607 ackm
->cc_method
->on_data_invalidated(ackm
->cc_data
,
1608 num_bytes_invalidated
);
1610 ackm
->time_of_last_ack_eliciting_pkt
[pkt_space
] = OSSL_TIME_ZERO
;
1611 ackm
->loss_time
[pkt_space
] = OSSL_TIME_ZERO
;
1612 ackm
->pto_count
= 0;
1613 ackm
->discarded
[pkt_space
] = 1;
1614 ackm
->ack_eliciting_bytes_in_flight
[pkt_space
] = 0;
1615 ackm_set_loss_detection_timer(ackm
);
1619 int ossl_ackm_on_handshake_confirmed(OSSL_ACKM
*ackm
)
1621 ackm
->handshake_confirmed
= 1;
1622 ackm
->peer_completed_addr_validation
= 1;
1623 ackm_set_loss_detection_timer(ackm
);
1627 static void ackm_queue_probe_handshake(OSSL_ACKM
*ackm
)
1629 ++ackm
->pending_probe
.handshake
;
1632 static void ackm_queue_probe_padded_initial(OSSL_ACKM
*ackm
)
1634 ++ackm
->pending_probe
.padded_initial
;
1637 static void ackm_queue_probe(OSSL_ACKM
*ackm
, int pkt_space
)
1639 ++ackm
->pending_probe
.pto
[pkt_space
];
1642 int ossl_ackm_on_timeout(OSSL_ACKM
*ackm
)
1645 OSSL_TIME earliest_loss_time
;
1646 OSSL_ACKM_TX_PKT
*lost_pkts
;
1648 earliest_loss_time
= ackm_get_loss_time_and_space(ackm
, &pkt_space
);
1649 if (!ossl_time_is_zero(earliest_loss_time
)) {
1650 /* Time threshold loss detection. */
1651 lost_pkts
= ackm_detect_and_remove_lost_pkts(ackm
, pkt_space
);
1652 assert(lost_pkts
!= NULL
);
1653 ackm_on_pkts_lost(ackm
, pkt_space
, lost_pkts
);
1654 ackm_set_loss_detection_timer(ackm
);
1658 if (ackm_ack_eliciting_bytes_in_flight(ackm
) == 0) {
1659 assert(!ackm
->peer_completed_addr_validation
);
1661 * Client sends an anti-deadlock packet: Initial is padded to earn more
1662 * anti-amplification credit. A handshake packet proves address
1665 if (ackm
->discarded
[QUIC_PN_SPACE_INITIAL
])
1666 ackm_queue_probe_handshake(ackm
);
1668 ackm_queue_probe_padded_initial(ackm
);
1671 * PTO. The user of the ACKM should send new data if available, else
1672 * retransmit old data, or if neither is available, send a single PING
1675 ackm_get_pto_time_and_space(ackm
, &pkt_space
);
1676 ackm_queue_probe(ackm
, pkt_space
);
1680 ackm_set_loss_detection_timer(ackm
);
1684 OSSL_TIME
ossl_ackm_get_loss_detection_deadline(OSSL_ACKM
*ackm
)
1686 return ackm
->loss_detection_deadline
;
1689 int ossl_ackm_get_probe_request(OSSL_ACKM
*ackm
, int clear
,
1690 OSSL_ACKM_PROBE_INFO
*info
)
1692 *info
= ackm
->pending_probe
;
1695 memset(&ackm
->pending_probe
, 0, sizeof(ackm
->pending_probe
));
1700 int ossl_ackm_get_largest_unacked(OSSL_ACKM
*ackm
, int pkt_space
, QUIC_PN
*pn
)
1702 struct tx_pkt_history_st
*h
;
1704 h
= get_tx_history(ackm
, pkt_space
);
1705 if (h
->tail
!= NULL
) {
1706 *pn
= h
->tail
->pkt_num
;
1713 /* Number of ACK-eliciting packets RX'd before we always emit an ACK. */
1714 #define PKTS_BEFORE_ACK 2
1715 /* Maximum amount of time to leave an ACK-eliciting packet un-ACK'd. */
1716 #define MAX_ACK_DELAY (ossl_time_multiply(OSSL_TIME_MS, 25))
1719 * Return 1 if emission of an ACK frame is currently desired.
1721 * This occurs when one or more of the following conditions occurs:
1723 * - We have flagged that we want to send an ACK frame
1724 * (for example, due to the packet threshold count being exceeded), or
1726 * - We have exceeded the ACK flush deadline, meaning that
1727 * we have received at least one ACK-eliciting packet, but held off on
1728 * sending an ACK frame immediately in the hope that more ACK-eliciting
1729 * packets might come in, but not enough did and we are now requesting
1730 * transmission of an ACK frame anyway.
1733 int ossl_ackm_is_ack_desired(OSSL_ACKM
*ackm
, int pkt_space
)
1735 return ackm
->rx_ack_desired
[pkt_space
]
1736 || (!ossl_time_is_infinity(ackm
->rx_ack_flush_deadline
[pkt_space
])
1737 && ossl_time_compare(ackm
->now(ackm
->now_arg
),
1738 ackm
->rx_ack_flush_deadline
[pkt_space
]) >= 0);
1742 * Returns 1 if an ACK frame matches a given packet number.
1744 static int ack_contains(const OSSL_QUIC_FRAME_ACK
*ack
, QUIC_PN pkt_num
)
1748 for (i
= 0; i
< ack
->num_ack_ranges
; ++i
)
1749 if (range_contains(&ack
->ack_ranges
[i
], pkt_num
))
1756 * Returns 1 iff a PN (which we have just received) was previously reported as
1757 * implied missing (by us, in an ACK frame we previously generated).
1759 static int ackm_is_missing(OSSL_ACKM
*ackm
, int pkt_space
, QUIC_PN pkt_num
)
1762 * A PN is implied missing if it is not greater than the highest PN in our
1763 * generated ACK frame, but is not matched by the frame.
1765 return ackm
->ack
[pkt_space
].num_ack_ranges
> 0
1766 && pkt_num
<= ackm
->ack
[pkt_space
].ack_ranges
[0].end
1767 && !ack_contains(&ackm
->ack
[pkt_space
], pkt_num
);
1771 * Returns 1 iff our RX of a PN newly establishes the implication of missing
1774 static int ackm_has_newly_missing(OSSL_ACKM
*ackm
, int pkt_space
)
1776 struct rx_pkt_history_st
*h
;
1778 h
= get_rx_history(ackm
, pkt_space
);
1780 if (h
->set
.tail
== NULL
)
1784 * The second condition here establishes that the highest PN range in our RX
1785 * history comprises only a single PN. If there is more than one, then this
1786 * function will have returned 1 during a previous call to
1787 * ossl_ackm_on_rx_packet assuming the third condition below was met. Thus
1788 * we only return 1 when the missing PN condition is newly established.
1790 * The third condition here establishes that the highest PN range in our RX
1791 * history is beyond (and does not border) the highest PN we have yet
1792 * reported in any ACK frame. Thus there is a gap of at least one PN between
1793 * the PNs we have ACK'd previously and the PN we have just received.
1795 return ackm
->ack
[pkt_space
].num_ack_ranges
> 0
1796 && h
->set
.tail
->range
.start
== h
->set
.tail
->range
.end
1797 && h
->set
.tail
->range
.start
1798 > ackm
->ack
[pkt_space
].ack_ranges
[0].end
+ 1;
1801 static void ackm_set_flush_deadline(OSSL_ACKM
*ackm
, int pkt_space
,
1804 ackm
->rx_ack_flush_deadline
[pkt_space
] = deadline
;
1806 if (ackm
->ack_deadline_cb
!= NULL
)
1807 ackm
->ack_deadline_cb(ossl_ackm_get_ack_deadline(ackm
, pkt_space
),
1808 pkt_space
, ackm
->ack_deadline_cb_arg
);
1811 /* Explicitly flags that we want to generate an ACK frame. */
1812 static void ackm_queue_ack(OSSL_ACKM
*ackm
, int pkt_space
)
1814 ackm
->rx_ack_desired
[pkt_space
] = 1;
1816 /* Cancel deadline. */
1817 ackm_set_flush_deadline(ackm
, pkt_space
, OSSL_TIME_INFINITY
);
1820 static void ackm_on_rx_ack_eliciting(OSSL_ACKM
*ackm
,
1821 OSSL_TIME rx_time
, int pkt_space
,
1824 if (ackm
->rx_ack_desired
[pkt_space
])
1825 /* ACK generation already requested so nothing to do. */
1828 ++ackm
->rx_ack_eliciting_pkts_since_last_ack
[pkt_space
];
1830 if (!ackm
->rx_ack_generated
[pkt_space
]
1832 || ackm
->rx_ack_eliciting_pkts_since_last_ack
[pkt_space
]
1834 || ackm_has_newly_missing(ackm
, pkt_space
)) {
1838 * - We have never yet generated an ACK frame, meaning that this
1839 * is the first ever packet received, which we should always
1840 * acknowledge immediately, or
1842 * - We previously reported the PN that we have just received as
1843 * missing in a previous ACK frame (meaning that we should report
1844 * the fact that we now have it to the peer immediately), or
1846 * - We have exceeded the ACK-eliciting packet threshold count
1847 * for the purposes of ACK coalescing, so request transmission
1848 * of an ACK frame, or
1850 * - The PN we just received and added to our PN RX history
1851 * newly implies one or more missing PNs, in which case we should
1852 * inform the peer by sending an ACK frame immediately.
1854 * We do not test the ACK flush deadline here because it is tested
1855 * separately in ossl_ackm_is_ack_desired.
1857 ackm_queue_ack(ackm
, pkt_space
);
1862 * Not emitting an ACK yet.
1864 * Update the ACK flush deadline.
1866 if (ossl_time_is_infinity(ackm
->rx_ack_flush_deadline
[pkt_space
]))
1867 ackm_set_flush_deadline(ackm
, pkt_space
,
1868 ossl_time_add(rx_time
, MAX_ACK_DELAY
));
1870 ackm_set_flush_deadline(ackm
, pkt_space
,
1871 ossl_time_min(ackm
->rx_ack_flush_deadline
[pkt_space
],
1872 ossl_time_add(rx_time
,
1876 int ossl_ackm_on_rx_packet(OSSL_ACKM
*ackm
, const OSSL_ACKM_RX_PKT
*pkt
)
1878 struct rx_pkt_history_st
*h
= get_rx_history(ackm
, pkt
->pkt_space
);
1881 if (ossl_ackm_is_rx_pn_processable(ackm
, pkt
->pkt_num
, pkt
->pkt_space
) != 1)
1882 /* PN has already been processed or written off, no-op. */
1886 * Record the largest PN we have RX'd and the time we received it.
1887 * We use this to calculate the ACK delay field of ACK frames.
1889 if (pkt
->pkt_num
> ackm
->rx_largest_pn
[pkt
->pkt_space
]) {
1890 ackm
->rx_largest_pn
[pkt
->pkt_space
] = pkt
->pkt_num
;
1891 ackm
->rx_largest_time
[pkt
->pkt_space
] = pkt
->time
;
1895 * If the PN we just received was previously implied missing by virtue of
1896 * being omitted from a previous ACK frame generated, we skip any packet
1897 * count thresholds or coalescing delays and emit a new ACK frame
1900 was_missing
= ackm_is_missing(ackm
, pkt
->pkt_space
, pkt
->pkt_num
);
1903 * Add the packet number to our history list of PNs we have not yet provably
1906 if (rx_pkt_history_add_pn(h
, pkt
->pkt_num
) != 1)
1910 * Receiving this packet may or may not cause us to emit an ACK frame.
1911 * We may not emit an ACK frame yet if we have not yet received a threshold
1912 * number of packets.
1914 if (pkt
->is_ack_eliciting
)
1915 ackm_on_rx_ack_eliciting(ackm
, pkt
->time
, pkt
->pkt_space
, was_missing
);
1917 /* Update the ECN counters according to which ECN signal we got, if any. */
1919 case OSSL_ACKM_ECN_ECT0
:
1920 ++ackm
->rx_ect0
[pkt
->pkt_space
];
1922 case OSSL_ACKM_ECN_ECT1
:
1923 ++ackm
->rx_ect1
[pkt
->pkt_space
];
1925 case OSSL_ACKM_ECN_ECNCE
:
1926 ++ackm
->rx_ecnce
[pkt
->pkt_space
];
1935 static void ackm_fill_rx_ack_ranges(OSSL_ACKM
*ackm
, int pkt_space
,
1936 OSSL_QUIC_FRAME_ACK
*ack
)
1938 struct rx_pkt_history_st
*h
= get_rx_history(ackm
, pkt_space
);
1939 struct pn_set_item_st
*x
;
1943 * Copy out ranges from the PN set, starting at the end, until we reach our
1944 * maximum number of ranges.
1946 for (x
= h
->set
.tail
;
1947 x
!= NULL
&& i
< OSSL_NELEM(ackm
->ack_ranges
);
1949 ackm
->ack_ranges
[pkt_space
][i
] = x
->range
;
1951 ack
->ack_ranges
= ackm
->ack_ranges
[pkt_space
];
1952 ack
->num_ack_ranges
= i
;
1955 const OSSL_QUIC_FRAME_ACK
*ossl_ackm_get_ack_frame(OSSL_ACKM
*ackm
,
1958 OSSL_QUIC_FRAME_ACK
*ack
= &ackm
->ack
[pkt_space
];
1959 OSSL_TIME now
= ackm
->now(ackm
->now_arg
);
1961 ackm_fill_rx_ack_ranges(ackm
, pkt_space
, ack
);
1963 if (!ossl_time_is_zero(ackm
->rx_largest_time
[pkt_space
])
1964 && ossl_time_compare(now
, ackm
->rx_largest_time
[pkt_space
]) > 0
1965 && pkt_space
== QUIC_PN_SPACE_APP
)
1967 ossl_time_subtract(now
, ackm
->rx_largest_time
[pkt_space
]);
1969 ack
->delay_time
= OSSL_TIME_ZERO
;
1971 ack
->ect0
= ackm
->rx_ect0
[pkt_space
];
1972 ack
->ect1
= ackm
->rx_ect1
[pkt_space
];
1973 ack
->ecnce
= ackm
->rx_ecnce
[pkt_space
];
1974 ack
->ecn_present
= 1;
1976 ackm
->rx_ack_eliciting_pkts_since_last_ack
[pkt_space
] = 0;
1978 ackm
->rx_ack_generated
[pkt_space
] = 1;
1979 ackm
->rx_ack_desired
[pkt_space
] = 0;
1980 ackm_set_flush_deadline(ackm
, pkt_space
, OSSL_TIME_INFINITY
);
1985 OSSL_TIME
ossl_ackm_get_ack_deadline(OSSL_ACKM
*ackm
, int pkt_space
)
1987 if (ackm
->rx_ack_desired
[pkt_space
])
1988 /* Already desired, deadline is now. */
1989 return OSSL_TIME_ZERO
;
1991 return ackm
->rx_ack_flush_deadline
[pkt_space
];
1994 int ossl_ackm_is_rx_pn_processable(OSSL_ACKM
*ackm
, QUIC_PN pn
, int pkt_space
)
1996 struct rx_pkt_history_st
*h
= get_rx_history(ackm
, pkt_space
);
1998 return pn
>= h
->watermark
&& pn_set_query(&h
->set
, pn
) == 0;
2001 void ossl_ackm_set_loss_detection_deadline_callback(OSSL_ACKM
*ackm
,
2002 void (*fn
)(OSSL_TIME deadline
,
2006 ackm
->loss_detection_deadline_cb
= fn
;
2007 ackm
->loss_detection_deadline_cb_arg
= arg
;
2010 void ossl_ackm_set_ack_deadline_callback(OSSL_ACKM
*ackm
,
2011 void (*fn
)(OSSL_TIME deadline
,
2016 ackm
->ack_deadline_cb
= fn
;
2017 ackm
->ack_deadline_cb_arg
= arg
;