From 1dfc29be4d743bba9c249b50acf8e44cb5477624 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Mon, 7 Aug 2023 14:48:10 +0100 Subject: [PATCH] sfc: conntrack state matches in TC rules Parse ct_state trk/est, mark and zone out of flower keys, and plumb them through to the hardware, performing some minor translations. Nothing can actually hit them yet as we're not offloading any DO_CT actions. Reviewed-by: Pieter Jansen van Vuuren Reviewed-by: Simon Horman Signed-off-by: Edward Cree Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/mae.c | 33 +++++++++++++++-- drivers/net/ethernet/sfc/mcdi.h | 5 +++ drivers/net/ethernet/sfc/tc.c | 66 +++++++++++++++++++++++++++++++++ drivers/net/ethernet/sfc/tc.h | 5 +++ 4 files changed, 105 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/sfc/mae.c b/drivers/net/ethernet/sfc/mae.c index 8ebf71a54bf99..1fa0958d52628 100644 --- a/drivers/net/ethernet/sfc/mae.c +++ b/drivers/net/ethernet/sfc/mae.c @@ -695,8 +695,13 @@ int efx_mae_match_check_caps(struct efx_nic *efx, CHECK(L4_SPORT, l4_sport) || CHECK(L4_DPORT, l4_dport) || CHECK(TCP_FLAGS, tcp_flags) || + CHECK_BIT(TCP_SYN_FIN_RST, tcp_syn_fin_rst) || CHECK_BIT(IS_IP_FRAG, ip_frag) || CHECK_BIT(IP_FIRST_FRAG, ip_firstfrag) || + CHECK_BIT(DO_CT, ct_state_trk) || + CHECK_BIT(CT_HIT, ct_state_est) || + CHECK(CT_MARK, ct_mark) || + CHECK(CT_DOMAIN, ct_zone) || CHECK(RECIRC_ID, recirc_id)) return rc; /* Matches on outer fields are done in a separate hardware table, @@ -1672,20 +1677,40 @@ static int efx_mae_populate_match_criteria(MCDI_DECLARE_STRUCT_PTR(match_crit), } MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_INGRESS_MPORT_SELECTOR_MASK, match->mask.ingress_port); - EFX_POPULATE_DWORD_2(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS), + EFX_POPULATE_DWORD_5(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS), + MAE_FIELD_MASK_VALUE_PAIRS_V2_DO_CT, + match->value.ct_state_trk, + MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_HIT, + match->value.ct_state_est, MAE_FIELD_MASK_VALUE_PAIRS_V2_IS_IP_FRAG, match->value.ip_frag, MAE_FIELD_MASK_VALUE_PAIRS_V2_IP_FIRST_FRAG, - match->value.ip_firstfrag); - EFX_POPULATE_DWORD_2(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS_MASK), + match->value.ip_firstfrag, + MAE_FIELD_MASK_VALUE_PAIRS_V2_TCP_SYN_FIN_RST, + match->value.tcp_syn_fin_rst); + EFX_POPULATE_DWORD_5(*_MCDI_STRUCT_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_FLAGS_MASK), + MAE_FIELD_MASK_VALUE_PAIRS_V2_DO_CT, + match->mask.ct_state_trk, + MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_HIT, + match->mask.ct_state_est, MAE_FIELD_MASK_VALUE_PAIRS_V2_IS_IP_FRAG, match->mask.ip_frag, MAE_FIELD_MASK_VALUE_PAIRS_V2_IP_FIRST_FRAG, - match->mask.ip_firstfrag); + match->mask.ip_firstfrag, + MAE_FIELD_MASK_VALUE_PAIRS_V2_TCP_SYN_FIN_RST, + match->mask.tcp_syn_fin_rst); MCDI_STRUCT_SET_BYTE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_RECIRC_ID, match->value.recirc_id); MCDI_STRUCT_SET_BYTE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_RECIRC_ID_MASK, match->mask.recirc_id); + MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_MARK, + match->value.ct_mark); + MCDI_STRUCT_SET_DWORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_MARK_MASK, + match->mask.ct_mark); + MCDI_STRUCT_SET_WORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_DOMAIN, + match->value.ct_zone); + MCDI_STRUCT_SET_WORD(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_CT_DOMAIN_MASK, + match->mask.ct_zone); MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_ETHER_TYPE_BE, match->value.eth_proto); MCDI_STRUCT_SET_WORD_BE(match_crit, MAE_FIELD_MASK_VALUE_PAIRS_V2_ETHER_TYPE_BE_MASK, diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h index 995a26686fd82..700d0252aebdc 100644 --- a/drivers/net/ethernet/sfc/mcdi.h +++ b/drivers/net/ethernet/sfc/mcdi.h @@ -229,6 +229,11 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev); BUILD_BUG_ON(MC_CMD_ ## _field ## _OFST & 1); \ *(__force __le16 *)MCDI_PTR(_buf, _field) = cpu_to_le16(_value);\ } while (0) +#define MCDI_STRUCT_SET_WORD(_buf, _field, _value) do { \ + BUILD_BUG_ON(_field ## _LEN != 2); \ + BUILD_BUG_ON(_field ## _OFST & 1); \ + *(__force __le16 *)MCDI_STRUCT_PTR(_buf, _field) = cpu_to_le16(_value);\ + } while (0) #define MCDI_WORD(_buf, _field) \ ((u16)BUILD_BUG_ON_ZERO(MC_CMD_ ## _field ## _LEN != 2) + \ le16_to_cpu(*(__force const __le16 *)MCDI_PTR(_buf, _field))) diff --git a/drivers/net/ethernet/sfc/tc.c b/drivers/net/ethernet/sfc/tc.c index 181636d07024a..a9f4bfaacac31 100644 --- a/drivers/net/ethernet/sfc/tc.c +++ b/drivers/net/ethernet/sfc/tc.c @@ -222,6 +222,7 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) | BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT_ULL(FLOW_DISSECTOR_KEY_CT) | BIT_ULL(FLOW_DISSECTOR_KEY_TCP) | BIT_ULL(FLOW_DISSECTOR_KEY_IP))) { NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx", @@ -363,6 +364,31 @@ static int efx_tc_flower_parse_match(struct efx_nic *efx, dissector->used_keys); return -EOPNOTSUPP; } + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) { + struct flow_match_ct fm; + + flow_rule_match_ct(rule, &fm); + match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); + match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED); + match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); + match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED); + if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED | + TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Unsupported ct_state match %#x", + fm.mask->ct_state); + return -EOPNOTSUPP; + } + match->value.ct_mark = fm.key->ct_mark; + match->mask.ct_mark = fm.mask->ct_mark; + match->value.ct_zone = fm.key->ct_zone; + match->mask.ct_zone = fm.mask->ct_zone; + + if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) { + NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported"); + return -EOPNOTSUPP; + } + } return 0; } @@ -758,6 +784,26 @@ static int efx_tc_flower_replace_foreign(struct efx_nic *efx, } match.mask.recirc_id = 0xff; + /* AR table can't match on DO_CT (+trk). But a commonly used pattern is + * +trk+est, which is strictly implied by +est, so rewrite it to that. + */ + if (match.mask.ct_state_trk && match.value.ct_state_trk && + match.mask.ct_state_est && match.value.ct_state_est) + match.mask.ct_state_trk = 0; + /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could + * match +trk-est (CT_HIT=0) despite being on an established connection. + * So make -est imply -tcp_syn_fin_rst match to ensure these packets + * still hit the software path. + */ + if (match.mask.ct_state_est && !match.value.ct_state_est) { + if (match.value.tcp_syn_fin_rst) { + /* Can't offload this combination */ + rc = -EOPNOTSUPP; + goto release; + } + match.mask.tcp_syn_fin_rst = true; + } + flow_action_for_each(i, fa, &fr->action) { switch (fa->id) { case FLOW_ACTION_REDIRECT: @@ -1089,6 +1135,26 @@ static int efx_tc_flower_replace(struct efx_nic *efx, } match.mask.recirc_id = 0xff; + /* AR table can't match on DO_CT (+trk). But a commonly used pattern is + * +trk+est, which is strictly implied by +est, so rewrite it to that. + */ + if (match.mask.ct_state_trk && match.value.ct_state_trk && + match.mask.ct_state_est && match.value.ct_state_est) + match.mask.ct_state_trk = 0; + /* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could + * match +trk-est (CT_HIT=0) despite being on an established connection. + * So make -est imply -tcp_syn_fin_rst match to ensure these packets + * still hit the software path. + */ + if (match.mask.ct_state_est && !match.value.ct_state_est) { + if (match.value.tcp_syn_fin_rst) { + /* Can't offload this combination */ + rc = -EOPNOTSUPP; + goto release; + } + match.mask.tcp_syn_fin_rst = true; + } + rc = efx_mae_match_check_caps(efx, &match.mask, extack); if (rc) goto release; diff --git a/drivers/net/ethernet/sfc/tc.h b/drivers/net/ethernet/sfc/tc.h index af15020c8da7e..ce8e30743a3a9 100644 --- a/drivers/net/ethernet/sfc/tc.h +++ b/drivers/net/ethernet/sfc/tc.h @@ -60,6 +60,7 @@ struct efx_tc_match_fields { /* L4 */ __be16 l4_sport, l4_dport; /* Ports (UDP, TCP) */ __be16 tcp_flags; + bool tcp_syn_fin_rst; /* true if ANY of SYN/FIN/RST are set */ /* Encap. The following are *outer* fields. Note that there are no * outer eth (L2) fields; this is because TC doesn't have them. */ @@ -68,6 +69,10 @@ struct efx_tc_match_fields { u8 enc_ip_tos, enc_ip_ttl; __be16 enc_sport, enc_dport; __be32 enc_keyid; /* e.g. VNI, VSID */ + /* Conntrack. */ + u16 ct_state_trk:1, ct_state_est:1; + u32 ct_mark; + u16 ct_zone; }; static inline bool efx_tc_match_is_encap(const struct efx_tc_match_fields *mask) -- 2.47.3