u8 syn_ect_snt: 2,
syn_ect_rcv: 2,
accecn_fail_mode:4;
+ u8 saw_accecn_opt :2;
#ifdef CONFIG_TCP_AO
u8 ao_keyid;
u8 ao_rcv_next;
syn_fastopen_child:1; /* created TFO passive child socket */
u8 keepalive_probes; /* num of allowed keep alive probes */
- u8 accecn_fail_mode:4; /* AccECN failure handling */
+ u8 accecn_fail_mode:4, /* AccECN failure handling */
+ saw_accecn_opt:2; /* An AccECN option was seen */
u32 tcp_tx_delay; /* delay (in usec) added to TX packets */
/* RTT measurement */
tp->accecn_fail_mode |= mode;
}
+#define TCP_ACCECN_OPT_NOT_SEEN 0x0
+#define TCP_ACCECN_OPT_EMPTY_SEEN 0x1
+#define TCP_ACCECN_OPT_COUNTER_SEEN 0x2
+#define TCP_ACCECN_OPT_FAIL_SEEN 0x3
+
static inline u8 tcp_accecn_ace(const struct tcphdr *th)
{
return (th->ae << 2) | (th->cwr << 1) | th->ece;
return true;
}
+static inline void tcp_accecn_saw_opt_fail_recv(struct tcp_sock *tp,
+ u8 saw_opt)
+{
+ tp->saw_accecn_opt = saw_opt;
+ if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
+}
+
/* Validate the 3rd ACK based on the ACE field, see Table 4 of AccECN spec */
static inline void tcp_accecn_third_ack(struct sock *sk,
const struct sk_buff *skb, u8 sent_ect)
}
}
+static inline u8 tcp_accecn_option_init(const struct sk_buff *skb,
+ u8 opt_offset)
+{
+ u8 *ptr = skb_transport_header(skb) + opt_offset;
+ unsigned int optlen = ptr[1] - 2;
+
+ if (WARN_ON_ONCE(ptr[0] != TCPOPT_ACCECN0 && ptr[0] != TCPOPT_ACCECN1))
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+ ptr += 2;
+
+ /* Detect option zeroing: an AccECN connection "MAY check that the
+ * initial value of the EE0B field or the EE1B field is non-zero"
+ */
+ if (optlen < TCPOLEN_ACCECN_PERFIELD)
+ return TCP_ACCECN_OPT_EMPTY_SEEN;
+ if (get_unaligned_be24(ptr) == 0)
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+ if (optlen < TCPOLEN_ACCECN_PERFIELD * 3)
+ return TCP_ACCECN_OPT_COUNTER_SEEN;
+ ptr += TCPOLEN_ACCECN_PERFIELD * 2;
+ if (get_unaligned_be24(ptr) == 0)
+ return TCP_ACCECN_OPT_FAIL_SEEN;
+
+ return TCP_ACCECN_OPT_COUNTER_SEEN;
+}
+
/* See Table 2 of the AccECN draft */
-static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct tcphdr *th,
- u8 ip_dsfield)
+static inline void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
+ const struct tcphdr *th, u8 ip_dsfield)
{
struct tcp_sock *tp = tcp_sk(sk);
u8 ace = tcp_accecn_ace(th);
default:
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_rcv = ip_dsfield & INET_ECN_MASK;
- tp->accecn_opt_demand = 2;
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tp->accecn_opt_demand = 2;
+ }
if (INET_ECN_is_ce(ip_dsfield) &&
tcp_accecn_validate_syn_feedback(sk, ace,
tp->syn_ect_snt)) {
__u32 tcpi_received_e1_bytes;
__u32 tcpi_received_e0_bytes;
__u32 tcpi_received_ce_bytes;
+ __u16 tcpi_accecn_fail_mode;
+ __u16 tcpi_accecn_opt_seen;
};
/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
tp->delivered = 0;
tp->delivered_ce = 0;
tp->accecn_fail_mode = 0;
+ tp->saw_accecn_opt = TCP_ACCECN_OPT_NOT_SEEN;
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
if (tp->rto_stamp)
info->tcpi_total_rto_time += tcp_clock_ms() - tp->rto_stamp;
+ info->tcpi_accecn_fail_mode = tp->accecn_fail_mode;
+ info->tcpi_accecn_opt_seen = tp->saw_accecn_opt;
info->tcpi_received_ce = tp->received_ce;
info->tcpi_delivered_e1_bytes = tp->delivered_ecn_bytes[ect1_idx];
info->tcpi_delivered_e0_bytes = tp->delivered_ecn_bytes[ect0_idx];
unsigned int i;
u8 *ptr;
+ if (tcp_accecn_opt_fail_recv(tp))
+ return false;
+
if (!(flag & FLAG_SLOWPATH) || !tp->rx_opt.accecn) {
+ if (!tp->saw_accecn_opt) {
+ /* Too late to enable after this point due to
+ * potential counter wraps
+ */
+ if (tp->bytes_sent >= (1 << 23) - 1) {
+ u8 saw_opt = TCP_ACCECN_OPT_FAIL_SEEN;
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ }
+ return false;
+ }
+
if (estimate_ecnfield) {
u8 ecnfield = estimate_ecnfield - 1;
order1 = (ptr[0] == TCPOPT_ACCECN1);
ptr += 2;
+ if (tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ tp->saw_accecn_opt = tcp_accecn_option_init(skb,
+ tp->rx_opt.accecn);
+ if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN)
+ tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_RECV);
+ }
+
res = !!estimate_ecnfield;
for (i = 0; i < 3; i++) {
u32 init_offset;
if (th->syn) {
if (tcp_ecn_mode_accecn(tp)) {
accecn_reflector = true;
- tcp_accecn_opt_demand_min(sk, 1);
+ if (tp->rx_opt.accecn &&
+ tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tp->rx_opt.accecn);
+
+ tcp_accecn_saw_opt_fail_recv(tp, saw_opt);
+ tcp_accecn_opt_demand_min(sk, 1);
+ }
}
if (sk->sk_state == TCP_SYN_RECV && sk->sk_socket && th->ack &&
TCP_SKB_CB(skb)->seq + 1 == TCP_SKB_CB(skb)->end_seq &&
*/
if (tcp_ecn_mode_any(tp))
- tcp_ecn_rcv_synack(sk, th, TCP_SKB_CB(skb)->ip_dsfield);
+ tcp_ecn_rcv_synack(sk, skb, th,
+ TCP_SKB_CB(skb)->ip_dsfield);
tcp_init_wl(tp, TCP_SKB_CB(skb)->seq);
tcp_try_undo_spurious_syn(sk);
tcp_rsk(req)->snt_tsval_first = 0;
tcp_rsk(req)->last_oow_ack_time = 0;
tcp_rsk(req)->accecn_ok = 0;
+ tcp_rsk(req)->saw_accecn_opt = TCP_ACCECN_OPT_NOT_SEEN;
+ tcp_rsk(req)->accecn_fail_mode = 0;
tcp_rsk(req)->syn_ect_rcv = 0;
tcp_rsk(req)->syn_ect_snt = 0;
req->mss = rx_opt->mss_clamp;
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_snt = treq->syn_ect_snt;
tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
+ tp->saw_accecn_opt = treq->saw_accecn_opt;
tp->prev_ecnfield = treq->syn_ect_rcv;
tp->accecn_opt_demand = 1;
tcp_ecn_received_counters_payload(sk, skb);
bool own_req;
tmp_opt.saw_tstamp = 0;
+ tmp_opt.accecn = 0;
if (th->doff > (sizeof(struct tcphdr)>>2)) {
tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
if (!(flg & TCP_FLAG_ACK))
return NULL;
+ if (tcp_rsk(req)->accecn_ok && tmp_opt.accecn &&
+ tcp_rsk(req)->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
+ u8 saw_opt = tcp_accecn_option_init(skb, tmp_opt.accecn);
+
+ tcp_rsk(req)->saw_accecn_opt = saw_opt;
+ if (tcp_rsk(req)->saw_accecn_opt == TCP_ACCECN_OPT_FAIL_SEEN) {
+ u8 fail_mode = TCP_ACCECN_OPT_FAIL_RECV;
+
+ tcp_rsk(req)->accecn_fail_mode |= fail_mode;
+ }
+ }
+
/* For Fast Open no more processing is needed (sk is the
* child socket).
*/
}
}
- /* Simultaneous open SYN/ACK needs AccECN option but not SYN */
+ /* Simultaneous open SYN/ACK needs AccECN option but not SYN.
+ * It is attempted to negotiate the use of AccECN also on the first
+ * retransmitted SYN, as mentioned in "3.1.4.1. Retransmitted SYNs"
+ * of AccECN draft.
+ */
if (unlikely((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK) &&
tcp_ecn_mode_accecn(tp) &&
+ inet_csk(sk)->icsk_retransmits < 2 &&
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option) &&
remaining >= TCPOLEN_ACCECN_BASE)) {
opts->use_synack_ecn_bytes = 1;
if (treq->accecn_ok &&
READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option) &&
- remaining >= TCPOLEN_ACCECN_BASE) {
+ req->num_timeout < 1 && remaining >= TCPOLEN_ACCECN_BASE) {
opts->use_synack_ecn_bytes = 1;
remaining -= tcp_options_fit_accecn(opts, 0, remaining);
}
if (tcp_ecn_mode_accecn(tp)) {
int ecn_opt = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option);
- if (ecn_opt &&
+ if (ecn_opt && tp->saw_accecn_opt && !tcp_accecn_opt_fail_send(tp) &&
(ecn_opt >= TCP_ACCECN_OPTION_FULL || tp->accecn_opt_demand ||
tcp_accecn_option_beacon_check(sk))) {
opts->use_synack_ecn_bytes = 0;