Two flags for congestion control (CC) module are added in this patch
related to AccECN negotiation. First, a new flag (TCP_CONG_NEEDS_ACCECN)
defines that the CC expects to negotiate AccECN functionality using the
ECE, CWR and AE flags in the TCP header.
Second, during ECN negotiation, ECT(0) in the IP header is used. This
patch enables CC to control whether ECT(0) or ECT(1) should be used on
a per-segment basis. A new flag (TCP_CONG_ECT_1_NEGOTIATION) defines the
expected ECT value in the IP header by the CA when not-yet initialized
for the connection.
The detailed AccECN negotiaotn can be found in IETF RFC9768.
Co-developed-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Olivier Tilmans <olivier.tilmans@nokia.com>
Signed-off-by: Ilpo Järvinen <ij@kernel.org>
Signed-off-by: Chia-Yu Chang <chia-yu.chang@nokia-bell-labs.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260131222515.8485-5-chia-yu.chang@nokia-bell-labs.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
return outer;
}
+/* Apply either ECT(0) or ECT(1) */
+static inline void __INET_ECN_xmit(struct sock *sk, bool use_ect_1)
+{
+ __u8 ect = use_ect_1 ? INET_ECN_ECT_1 : INET_ECN_ECT_0;
+
+ /* Mask the complete byte in case the connection alternates between
+ * ECT(0) and ECT(1).
+ */
+ inet_sk(sk)->tos &= ~INET_ECN_MASK;
+ inet_sk(sk)->tos |= ect;
+ if (inet6_sk(sk)) {
+ inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
+ inet6_sk(sk)->tclass |= ect;
+ }
+}
+
static inline void INET_ECN_xmit(struct sock *sk)
{
- inet_sk(sk)->tos |= INET_ECN_ECT_0;
- if (inet6_sk(sk) != NULL)
- inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+ __INET_ECN_xmit(sk, false);
}
static inline void INET_ECN_dontxmit(struct sock *sk)
#define TCP_CONG_NON_RESTRICTED BIT(0)
/* Requires ECN/ECT set on all packets */
#define TCP_CONG_NEEDS_ECN BIT(1)
-#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN)
+/* Require successfully negotiated AccECN capability */
+#define TCP_CONG_NEEDS_ACCECN BIT(2)
+/* Use ECT(1) instead of ECT(0) while the CA is uninitialized */
+#define TCP_CONG_ECT_1_NEGOTIATION BIT(3)
+#define TCP_CONG_MASK (TCP_CONG_NON_RESTRICTED | TCP_CONG_NEEDS_ECN | \
+ TCP_CONG_NEEDS_ACCECN | TCP_CONG_ECT_1_NEGOTIATION)
union tcp_cc_info;
return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN;
}
+static inline bool tcp_ca_needs_accecn(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ACCECN;
+}
+
+static inline bool tcp_ca_ect_1_negotiation(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ return icsk->icsk_ca_ops->flags & TCP_CONG_ECT_1_NEGOTIATION;
+}
+
static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
TCP_ACCECN_OPTION_FULL = 2,
};
+/* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
+static inline void INET_ECN_xmit_ect_1_negotiation(struct sock *sk)
+{
+ __INET_ECN_xmit(sk, tcp_ca_ect_1_negotiation(sk));
+}
+
static inline void tcp_ecn_queue_cwr(struct tcp_sock *tp)
{
/* Do not set CWR if in AccECN mode! */
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
if (tp->ecn_flags & TCP_ECN_MODE_ACCECN) {
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ACE;
bool use_ecn, use_accecn;
u8 tcp_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
- use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN;
+ use_accecn = tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ACCECN ||
+ tcp_ca_needs_accecn(sk);
use_ecn = tcp_ecn == TCP_ECN_IN_ECN_OUT_ECN ||
tcp_ecn == TCP_ECN_IN_ACCECN_OUT_ECN ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn || use_accecn;
if (use_ecn) {
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
if (use_accecn) {
#include <linux/gfp.h>
#include <linux/jhash.h>
#include <net/tcp.h>
+#include <net/tcp_ecn.h>
#include <trace/events/tcp.h>
static DEFINE_SPINLOCK(tcp_cong_list_lock);
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
else
INET_ECN_dontxmit(sk);
}
memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
if (ca->flags & TCP_CONG_NEEDS_ECN)
- INET_ECN_xmit(sk);
+ INET_ECN_xmit_ect_1_negotiation(sk);
else
INET_ECN_dontxmit(sk);
u32 ecn_ok_dst;
if (tcp_accecn_syn_requested(th) &&
- READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3) {
+ (READ_ONCE(net->ipv4.sysctl_tcp_ecn) >= 3 ||
+ tcp_ca_needs_accecn(listen_sk))) {
inet_rsk(req)->ecn_ok = 1;
tcp_rsk(req)->accecn_ok = 1;
tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield &