2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include <net/dst_metadata.h>
49 #include "en/tc/post_act.h"
50 #include "en/tc/act_stats.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_tun_encap.h"
63 #include "en/tc/sample.h"
64 #include "en/tc/act/act.h"
65 #include "en/tc/post_meter.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "diag/en_tc_tracepoint.h"
70 #include <asm/div64.h>
74 #define MLX5E_TC_TABLE_NUM_GROUPS 4
75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
77 struct mlx5e_tc_table
{
78 /* Protects the dynamic assignment of the t parameter
79 * which is the nic tc root table.
82 struct mlx5e_priv
*priv
;
83 struct mlx5_flow_table
*t
;
84 struct mlx5_flow_table
*miss_t
;
85 struct mlx5_fs_chains
*chains
;
86 struct mlx5e_post_act
*post_act
;
90 struct mod_hdr_tbl mod_hdr
;
91 struct mutex hairpin_tbl_lock
; /* protects hairpin_tbl */
92 DECLARE_HASHTABLE(hairpin_tbl
, 8);
94 struct notifier_block netdevice_nb
;
95 struct netdev_net_notifier netdevice_nn
;
97 struct mlx5_tc_ct_priv
*ct
;
98 struct mapping_ctx
*mapping
;
99 struct dentry
*dfs_root
;
101 /* tc action stats */
102 struct mlx5e_tc_act_stats_handle
*action_stats_handle
;
105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings
[] = {
106 [MAPPED_OBJ_TO_REG
] = {
107 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_0
,
112 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_0
,
117 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_C_1
,
119 .mlen
= ESW_TUN_OPTS_BITS
+ ESW_TUN_ID_BITS
,
120 .soffset
= MLX5_BYTE_OFF(fte_match_param
,
121 misc_parameters_2
.metadata_reg_c_1
),
123 [ZONE_TO_REG
] = zone_to_reg_ct
,
124 [ZONE_RESTORE_TO_REG
] = zone_restore_to_reg_ct
,
125 [CTSTATE_TO_REG
] = ctstate_to_reg_ct
,
126 [MARK_TO_REG
] = mark_to_reg_ct
,
127 [LABELS_TO_REG
] = labels_to_reg_ct
,
128 [FTEID_TO_REG
] = fteid_to_reg_ct
,
129 /* For NIC rules we store the restore metadata directly
130 * into reg_b that is passed to SW since we don't
131 * jump between steering domains.
133 [NIC_MAPPED_OBJ_TO_REG
] = {
134 .mfield
= MLX5_ACTION_IN_FIELD_METADATA_REG_B
,
138 [NIC_ZONE_RESTORE_TO_REG
] = nic_zone_restore_to_reg_ct
,
139 [PACKET_COLOR_TO_REG
] = packet_color_to_reg
,
142 struct mlx5e_tc_jump_state
{
145 struct mlx5_flow_attr
*jumping_attr
;
147 enum flow_action_id last_id
;
151 struct mlx5e_tc_table
*mlx5e_tc_table_alloc(void)
153 struct mlx5e_tc_table
*tc
;
155 tc
= kvzalloc(sizeof(*tc
), GFP_KERNEL
);
156 return tc
? tc
: ERR_PTR(-ENOMEM
);
159 void mlx5e_tc_table_free(struct mlx5e_tc_table
*tc
)
164 struct mlx5_fs_chains
*mlx5e_nic_chains(struct mlx5e_tc_table
*tc
)
169 /* To avoid false lock dependency warning set the tc_ht lock
170 * class different than the lock class of the ht being used when deleting
171 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
173 * it's different than the ht->mutex here.
175 static struct lock_class_key tc_ht_lock_key
;
176 static struct lock_class_key tc_ht_wq_key
;
178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow
*flow
);
179 static void free_flow_post_acts(struct mlx5e_tc_flow
*flow
);
180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow
*flow
,
181 struct mlx5_flow_attr
*attr
);
184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec
*spec
,
185 enum mlx5e_tc_attr_to_reg type
,
189 void *headers_c
= spec
->match_criteria
, *headers_v
= spec
->match_value
, *fmask
, *fval
;
190 int soffset
= mlx5e_tc_attr_to_reg_mappings
[type
].soffset
;
191 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
192 int match_len
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
193 u32 max_mask
= GENMASK(match_len
- 1, 0);
194 __be32 curr_mask_be
, curr_val_be
;
195 u32 curr_mask
, curr_val
;
197 fmask
= headers_c
+ soffset
;
198 fval
= headers_v
+ soffset
;
200 memcpy(&curr_mask_be
, fmask
, 4);
201 memcpy(&curr_val_be
, fval
, 4);
203 curr_mask
= be32_to_cpu(curr_mask_be
);
204 curr_val
= be32_to_cpu(curr_val_be
);
206 //move to correct offset
207 WARN_ON(mask
> max_mask
);
210 max_mask
<<= moffset
;
213 curr_mask
&= ~max_mask
;
214 curr_val
&= ~max_mask
;
216 //add current to mask
220 //back to be32 and write
221 curr_mask_be
= cpu_to_be32(curr_mask
);
222 curr_val_be
= cpu_to_be32(curr_val
);
224 memcpy(fmask
, &curr_mask_be
, 4);
225 memcpy(fval
, &curr_val_be
, 4);
227 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS_2
;
231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec
*spec
,
232 enum mlx5e_tc_attr_to_reg type
,
236 void *headers_c
= spec
->match_criteria
, *headers_v
= spec
->match_value
, *fmask
, *fval
;
237 int soffset
= mlx5e_tc_attr_to_reg_mappings
[type
].soffset
;
238 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
239 int match_len
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
240 u32 max_mask
= GENMASK(match_len
- 1, 0);
241 __be32 curr_mask_be
, curr_val_be
;
242 u32 curr_mask
, curr_val
;
244 fmask
= headers_c
+ soffset
;
245 fval
= headers_v
+ soffset
;
247 memcpy(&curr_mask_be
, fmask
, 4);
248 memcpy(&curr_val_be
, fval
, 4);
250 curr_mask
= be32_to_cpu(curr_mask_be
);
251 curr_val
= be32_to_cpu(curr_val_be
);
253 *mask
= (curr_mask
>> moffset
) & max_mask
;
254 *val
= (curr_val
>> moffset
) & max_mask
;
258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev
*mdev
,
259 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
260 enum mlx5_flow_namespace_type ns
,
261 enum mlx5e_tc_attr_to_reg type
,
264 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
265 int mfield
= mlx5e_tc_attr_to_reg_mappings
[type
].mfield
;
266 int mlen
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
270 modact
= mlx5e_mod_hdr_alloc(mdev
, ns
, mod_hdr_acts
);
272 return PTR_ERR(modact
);
274 /* Firmware has 5bit length field and 0 means 32bits */
278 MLX5_SET(set_action_in
, modact
, action_type
, MLX5_ACTION_TYPE_SET
);
279 MLX5_SET(set_action_in
, modact
, field
, mfield
);
280 MLX5_SET(set_action_in
, modact
, offset
, moffset
);
281 MLX5_SET(set_action_in
, modact
, length
, mlen
);
282 MLX5_SET(set_action_in
, modact
, data
, data
);
283 err
= mod_hdr_acts
->num_actions
;
284 mod_hdr_acts
->num_actions
++;
289 static struct mlx5e_tc_act_stats_handle
*
290 get_act_stats_handle(struct mlx5e_priv
*priv
)
292 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
293 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
294 struct mlx5_rep_uplink_priv
*uplink_priv
;
295 struct mlx5e_rep_priv
*uplink_rpriv
;
297 if (is_mdev_switchdev_mode(priv
->mdev
)) {
298 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
299 uplink_priv
= &uplink_rpriv
->uplink_priv
;
301 return uplink_priv
->action_stats_handle
;
304 return tc
->action_stats_handle
;
307 struct mlx5e_tc_int_port_priv
*
308 mlx5e_get_int_port_priv(struct mlx5e_priv
*priv
)
310 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
311 struct mlx5_rep_uplink_priv
*uplink_priv
;
312 struct mlx5e_rep_priv
*uplink_rpriv
;
314 if (is_mdev_switchdev_mode(priv
->mdev
)) {
315 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
316 uplink_priv
= &uplink_rpriv
->uplink_priv
;
318 return uplink_priv
->int_port_priv
;
324 struct mlx5e_flow_meters
*
325 mlx5e_get_flow_meters(struct mlx5_core_dev
*dev
)
327 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
328 struct mlx5_rep_uplink_priv
*uplink_priv
;
329 struct mlx5e_rep_priv
*uplink_rpriv
;
330 struct mlx5e_priv
*priv
;
332 if (is_mdev_switchdev_mode(dev
)) {
333 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
334 uplink_priv
= &uplink_rpriv
->uplink_priv
;
335 priv
= netdev_priv(uplink_rpriv
->netdev
);
336 if (!uplink_priv
->flow_meters
)
337 uplink_priv
->flow_meters
=
338 mlx5e_flow_meters_init(priv
,
339 MLX5_FLOW_NAMESPACE_FDB
,
340 uplink_priv
->post_act
);
341 if (!IS_ERR(uplink_priv
->flow_meters
))
342 return uplink_priv
->flow_meters
;
348 static struct mlx5_tc_ct_priv
*
349 get_ct_priv(struct mlx5e_priv
*priv
)
351 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
352 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
353 struct mlx5_rep_uplink_priv
*uplink_priv
;
354 struct mlx5e_rep_priv
*uplink_rpriv
;
356 if (is_mdev_switchdev_mode(priv
->mdev
)) {
357 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
358 uplink_priv
= &uplink_rpriv
->uplink_priv
;
360 return uplink_priv
->ct_priv
;
366 static struct mlx5e_tc_psample
*
367 get_sample_priv(struct mlx5e_priv
*priv
)
369 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
370 struct mlx5_rep_uplink_priv
*uplink_priv
;
371 struct mlx5e_rep_priv
*uplink_rpriv
;
373 if (is_mdev_switchdev_mode(priv
->mdev
)) {
374 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
375 uplink_priv
= &uplink_rpriv
->uplink_priv
;
377 return uplink_priv
->tc_psample
;
383 static struct mlx5e_post_act
*
384 get_post_action(struct mlx5e_priv
*priv
)
386 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
387 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
388 struct mlx5_rep_uplink_priv
*uplink_priv
;
389 struct mlx5e_rep_priv
*uplink_rpriv
;
391 if (is_mdev_switchdev_mode(priv
->mdev
)) {
392 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
393 uplink_priv
= &uplink_rpriv
->uplink_priv
;
395 return uplink_priv
->post_act
;
401 struct mlx5_flow_handle
*
402 mlx5_tc_rule_insert(struct mlx5e_priv
*priv
,
403 struct mlx5_flow_spec
*spec
,
404 struct mlx5_flow_attr
*attr
)
406 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
408 if (is_mdev_switchdev_mode(priv
->mdev
))
409 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
411 return mlx5e_add_offloaded_nic_rule(priv
, spec
, attr
);
415 mlx5_tc_rule_delete(struct mlx5e_priv
*priv
,
416 struct mlx5_flow_handle
*rule
,
417 struct mlx5_flow_attr
*attr
)
419 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
421 if (is_mdev_switchdev_mode(priv
->mdev
)) {
422 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
426 mlx5e_del_offloaded_nic_rule(priv
, rule
, attr
);
430 is_flow_meter_action(struct mlx5_flow_attr
*attr
)
432 return (((attr
->action
& MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO
) &&
433 (attr
->exe_aso_type
== MLX5_EXE_ASO_FLOW_METER
)) ||
434 attr
->flags
& MLX5_ATTR_FLAG_MTU
);
438 mlx5e_tc_add_flow_meter(struct mlx5e_priv
*priv
,
439 struct mlx5_flow_attr
*attr
)
441 struct mlx5e_post_act
*post_act
= get_post_action(priv
);
442 struct mlx5e_post_meter_priv
*post_meter
;
443 enum mlx5_flow_namespace_type ns_type
;
444 struct mlx5e_flow_meter_handle
*meter
;
445 enum mlx5e_post_meter_type type
;
447 meter
= mlx5e_tc_meter_replace(priv
->mdev
, &attr
->meter_attr
.params
);
449 mlx5_core_err(priv
->mdev
, "Failed to get flow meter\n");
450 return PTR_ERR(meter
);
453 ns_type
= mlx5e_tc_meter_get_namespace(meter
->flow_meters
);
454 type
= meter
->params
.mtu
? MLX5E_POST_METER_MTU
: MLX5E_POST_METER_RATE
;
455 post_meter
= mlx5e_post_meter_init(priv
, ns_type
, post_act
,
457 meter
->act_counter
, meter
->drop_counter
,
458 attr
->branch_true
, attr
->branch_false
);
459 if (IS_ERR(post_meter
)) {
460 mlx5_core_err(priv
->mdev
, "Failed to init post meter\n");
464 attr
->meter_attr
.meter
= meter
;
465 attr
->meter_attr
.post_meter
= post_meter
;
466 attr
->dest_ft
= mlx5e_post_meter_get_ft(post_meter
);
467 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
472 mlx5e_tc_meter_put(meter
);
473 return PTR_ERR(post_meter
);
477 mlx5e_tc_del_flow_meter(struct mlx5_eswitch
*esw
, struct mlx5_flow_attr
*attr
)
479 mlx5e_post_meter_cleanup(esw
, attr
->meter_attr
.post_meter
);
480 mlx5e_tc_meter_put(attr
->meter_attr
.meter
);
483 struct mlx5_flow_handle
*
484 mlx5e_tc_rule_offload(struct mlx5e_priv
*priv
,
485 struct mlx5_flow_spec
*spec
,
486 struct mlx5_flow_attr
*attr
)
488 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
491 if (!is_mdev_switchdev_mode(priv
->mdev
))
492 return mlx5e_add_offloaded_nic_rule(priv
, spec
, attr
);
494 if (attr
->flags
& MLX5_ATTR_FLAG_SAMPLE
)
495 return mlx5e_tc_sample_offload(get_sample_priv(priv
), spec
, attr
);
497 if (is_flow_meter_action(attr
)) {
498 err
= mlx5e_tc_add_flow_meter(priv
, attr
);
503 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
507 mlx5e_tc_rule_unoffload(struct mlx5e_priv
*priv
,
508 struct mlx5_flow_handle
*rule
,
509 struct mlx5_flow_attr
*attr
)
511 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
513 if (!is_mdev_switchdev_mode(priv
->mdev
)) {
514 mlx5e_del_offloaded_nic_rule(priv
, rule
, attr
);
518 if (attr
->flags
& MLX5_ATTR_FLAG_SAMPLE
) {
519 mlx5e_tc_sample_unoffload(get_sample_priv(priv
), rule
, attr
);
523 mlx5_eswitch_del_offloaded_rule(esw
, rule
, attr
);
525 if (attr
->meter_attr
.meter
)
526 mlx5e_tc_del_flow_meter(esw
, attr
);
530 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev
*mdev
,
531 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
532 enum mlx5_flow_namespace_type ns
,
533 enum mlx5e_tc_attr_to_reg type
,
536 int ret
= mlx5e_tc_match_to_reg_set_and_get_id(mdev
, mod_hdr_acts
, ns
, type
, data
);
538 return ret
< 0 ? ret
: 0;
541 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev
*mdev
,
542 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
,
543 enum mlx5e_tc_attr_to_reg type
,
544 int act_id
, u32 data
)
546 int moffset
= mlx5e_tc_attr_to_reg_mappings
[type
].moffset
;
547 int mfield
= mlx5e_tc_attr_to_reg_mappings
[type
].mfield
;
548 int mlen
= mlx5e_tc_attr_to_reg_mappings
[type
].mlen
;
551 modact
= mlx5e_mod_hdr_get_item(mod_hdr_acts
, act_id
);
553 /* Firmware has 5bit length field and 0 means 32bits */
557 MLX5_SET(set_action_in
, modact
, action_type
, MLX5_ACTION_TYPE_SET
);
558 MLX5_SET(set_action_in
, modact
, field
, mfield
);
559 MLX5_SET(set_action_in
, modact
, offset
, moffset
);
560 MLX5_SET(set_action_in
, modact
, length
, mlen
);
561 MLX5_SET(set_action_in
, modact
, data
, data
);
564 struct mlx5e_hairpin
{
565 struct mlx5_hairpin
*pair
;
567 struct mlx5_core_dev
*func_mdev
;
568 struct mlx5e_priv
*func_priv
;
570 struct mlx5e_tir direct_tir
;
574 struct mlx5e_rqt indir_rqt
;
575 struct mlx5e_tir indir_tir
[MLX5E_NUM_INDIR_TIRS
];
576 struct mlx5_ttc_table
*ttc
;
579 struct mlx5e_hairpin_entry
{
580 /* a node of a hash table which keeps all the hairpin entries */
581 struct hlist_node hairpin_hlist
;
583 /* protects flows list */
584 spinlock_t flows_lock
;
585 /* flows sharing the same hairpin */
586 struct list_head flows
;
587 /* hpe's that were not fully initialized when dead peer update event
588 * function traversed them.
590 struct list_head dead_peer_wait_list
;
594 struct mlx5e_hairpin
*hp
;
596 struct completion res_ready
;
599 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
600 struct mlx5e_tc_flow
*flow
);
602 struct mlx5e_tc_flow
*mlx5e_flow_get(struct mlx5e_tc_flow
*flow
)
604 if (!flow
|| !refcount_inc_not_zero(&flow
->refcnt
))
605 return ERR_PTR(-EINVAL
);
609 void mlx5e_flow_put(struct mlx5e_priv
*priv
, struct mlx5e_tc_flow
*flow
)
611 if (refcount_dec_and_test(&flow
->refcnt
)) {
612 mlx5e_tc_del_flow(priv
, flow
);
613 kfree_rcu(flow
, rcu_head
);
617 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow
*flow
)
619 return flow_flag_test(flow
, ESWITCH
);
622 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow
*flow
)
624 return flow_flag_test(flow
, FT
);
627 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow
*flow
)
629 return flow_flag_test(flow
, OFFLOADED
);
632 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow
*flow
)
634 return mlx5e_is_eswitch_flow(flow
) ?
635 MLX5_FLOW_NAMESPACE_FDB
: MLX5_FLOW_NAMESPACE_KERNEL
;
638 static struct mlx5_core_dev
*
639 get_flow_counter_dev(struct mlx5e_tc_flow
*flow
)
641 return mlx5e_is_eswitch_flow(flow
) ? flow
->attr
->esw_attr
->counter_dev
: flow
->priv
->mdev
;
644 static struct mod_hdr_tbl
*
645 get_mod_hdr_table(struct mlx5e_priv
*priv
, struct mlx5e_tc_flow
*flow
)
647 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
648 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
650 return mlx5e_get_flow_namespace(flow
) == MLX5_FLOW_NAMESPACE_FDB
?
651 &esw
->offloads
.mod_hdr
:
655 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv
*priv
,
656 struct mlx5e_tc_flow
*flow
,
657 struct mlx5_flow_attr
*attr
)
659 struct mlx5e_mod_hdr_handle
*mh
;
661 mh
= mlx5e_mod_hdr_attach(priv
->mdev
, get_mod_hdr_table(priv
, flow
),
662 mlx5e_get_flow_namespace(flow
),
663 &attr
->parse_attr
->mod_hdr_acts
);
667 WARN_ON(attr
->modify_hdr
);
668 attr
->modify_hdr
= mlx5e_mod_hdr_get(mh
);
674 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv
*priv
,
675 struct mlx5e_tc_flow
*flow
,
676 struct mlx5_flow_attr
*attr
)
678 /* flow wasn't fully initialized */
682 mlx5e_mod_hdr_detach(priv
->mdev
, get_mod_hdr_table(priv
, flow
),
688 struct mlx5_core_dev
*mlx5e_hairpin_get_mdev(struct net
*net
, int ifindex
)
690 struct mlx5_core_dev
*mdev
;
691 struct net_device
*netdev
;
692 struct mlx5e_priv
*priv
;
694 netdev
= dev_get_by_index(net
, ifindex
);
696 return ERR_PTR(-ENODEV
);
698 priv
= netdev_priv(netdev
);
702 /* Mirred tc action holds a refcount on the ifindex net_device (see
703 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
704 * after dev_put(netdev), while we're in the context of adding a tc flow.
706 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
707 * stored in a hairpin object, which exists until all flows, that refer to it, get
710 * On the other hand, after a hairpin object has been created, the peer net_device may
711 * be removed/unbound while there are still some hairpin flows that are using it. This
712 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
713 * NETDEV_UNREGISTER event of the peer net_device.
718 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin
*hp
)
720 struct mlx5e_tir_builder
*builder
;
723 builder
= mlx5e_tir_builder_alloc(false);
727 err
= mlx5_core_alloc_transport_domain(hp
->func_mdev
, &hp
->tdn
);
731 mlx5e_tir_builder_build_inline(builder
, hp
->tdn
, hp
->pair
->rqn
[0]);
732 err
= mlx5e_tir_init(&hp
->direct_tir
, builder
, hp
->func_mdev
, false);
737 mlx5e_tir_builder_free(builder
);
741 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
746 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin
*hp
)
748 mlx5e_tir_destroy(&hp
->direct_tir
);
749 mlx5_core_dealloc_transport_domain(hp
->func_mdev
, hp
->tdn
);
752 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin
*hp
)
754 struct mlx5e_priv
*priv
= hp
->func_priv
;
755 struct mlx5_core_dev
*mdev
= priv
->mdev
;
756 struct mlx5e_rss_params_indir
*indir
;
759 indir
= kvmalloc(sizeof(*indir
), GFP_KERNEL
);
763 mlx5e_rss_params_indir_init_uniform(indir
, hp
->num_channels
);
764 err
= mlx5e_rqt_init_indir(&hp
->indir_rqt
, mdev
, hp
->pair
->rqn
, hp
->num_channels
,
765 mlx5e_rx_res_get_current_hash(priv
->rx_res
).hfunc
,
772 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin
*hp
)
774 struct mlx5e_priv
*priv
= hp
->func_priv
;
775 struct mlx5e_rss_params_hash rss_hash
;
776 enum mlx5_traffic_types tt
, max_tt
;
777 struct mlx5e_tir_builder
*builder
;
780 builder
= mlx5e_tir_builder_alloc(false);
784 rss_hash
= mlx5e_rx_res_get_current_hash(priv
->rx_res
);
786 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++) {
787 struct mlx5e_rss_params_traffic_type rss_tt
;
789 rss_tt
= mlx5e_rss_get_default_tt_config(tt
);
791 mlx5e_tir_builder_build_rqt(builder
, hp
->tdn
,
792 mlx5e_rqt_get_rqtn(&hp
->indir_rqt
),
794 mlx5e_tir_builder_build_rss(builder
, &rss_hash
, &rss_tt
, false);
796 err
= mlx5e_tir_init(&hp
->indir_tir
[tt
], builder
, hp
->func_mdev
, false);
798 mlx5_core_warn(hp
->func_mdev
, "create indirect tirs failed, %d\n", err
);
799 goto err_destroy_tirs
;
802 mlx5e_tir_builder_clear(builder
);
806 mlx5e_tir_builder_free(builder
);
811 for (tt
= 0; tt
< max_tt
; tt
++)
812 mlx5e_tir_destroy(&hp
->indir_tir
[tt
]);
817 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin
*hp
)
821 for (tt
= 0; tt
< MLX5E_NUM_INDIR_TIRS
; tt
++)
822 mlx5e_tir_destroy(&hp
->indir_tir
[tt
]);
825 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin
*hp
,
826 struct ttc_params
*ttc_params
)
828 struct mlx5_flow_table_attr
*ft_attr
= &ttc_params
->ft_attr
;
831 memset(ttc_params
, 0, sizeof(*ttc_params
));
833 ttc_params
->ns
= mlx5_get_flow_namespace(hp
->func_mdev
,
834 MLX5_FLOW_NAMESPACE_KERNEL
);
835 for (tt
= 0; tt
< MLX5_NUM_TT
; tt
++) {
836 ttc_params
->dests
[tt
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
837 ttc_params
->dests
[tt
].tir_num
=
839 mlx5e_tir_get_tirn(&hp
->direct_tir
) :
840 mlx5e_tir_get_tirn(&hp
->indir_tir
[tt
]);
843 ft_attr
->level
= MLX5E_TC_TTC_FT_LEVEL
;
844 ft_attr
->prio
= MLX5E_TC_PRIO
;
847 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin
*hp
)
849 struct mlx5e_priv
*priv
= hp
->func_priv
;
850 struct ttc_params ttc_params
;
851 struct mlx5_ttc_table
*ttc
;
854 err
= mlx5e_hairpin_create_indirect_rqt(hp
);
858 err
= mlx5e_hairpin_create_indirect_tirs(hp
);
860 goto err_create_indirect_tirs
;
862 mlx5e_hairpin_set_ttc_params(hp
, &ttc_params
);
863 hp
->ttc
= mlx5_create_ttc_table(priv
->mdev
, &ttc_params
);
864 if (IS_ERR(hp
->ttc
)) {
865 err
= PTR_ERR(hp
->ttc
);
866 goto err_create_ttc_table
;
869 ttc
= mlx5e_fs_get_ttc(priv
->fs
, false);
870 netdev_dbg(priv
->netdev
, "add hairpin: using %d channels rss ttc table id %x\n",
872 mlx5_get_ttc_flow_table(ttc
)->id
);
876 err_create_ttc_table
:
877 mlx5e_hairpin_destroy_indirect_tirs(hp
);
878 err_create_indirect_tirs
:
879 mlx5e_rqt_destroy(&hp
->indir_rqt
);
884 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin
*hp
)
886 mlx5_destroy_ttc_table(hp
->ttc
);
887 mlx5e_hairpin_destroy_indirect_tirs(hp
);
888 mlx5e_rqt_destroy(&hp
->indir_rqt
);
891 static struct mlx5e_hairpin
*
892 mlx5e_hairpin_create(struct mlx5e_priv
*priv
, struct mlx5_hairpin_params
*params
,
895 struct mlx5_core_dev
*func_mdev
, *peer_mdev
;
896 struct mlx5e_hairpin
*hp
;
897 struct mlx5_hairpin
*pair
;
900 hp
= kzalloc(sizeof(*hp
), GFP_KERNEL
);
902 return ERR_PTR(-ENOMEM
);
904 func_mdev
= priv
->mdev
;
905 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
906 if (IS_ERR(peer_mdev
)) {
907 err
= PTR_ERR(peer_mdev
);
908 goto create_pair_err
;
911 pair
= mlx5_core_hairpin_create(func_mdev
, peer_mdev
, params
);
914 goto create_pair_err
;
917 hp
->func_mdev
= func_mdev
;
918 hp
->func_priv
= priv
;
919 hp
->num_channels
= params
->num_channels
;
920 hp
->log_num_packets
= params
->log_num_packets
;
922 err
= mlx5e_hairpin_create_transport(hp
);
924 goto create_transport_err
;
926 if (hp
->num_channels
> 1) {
927 err
= mlx5e_hairpin_rss_init(hp
);
935 mlx5e_hairpin_destroy_transport(hp
);
936 create_transport_err
:
937 mlx5_core_hairpin_destroy(hp
->pair
);
943 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin
*hp
)
945 if (hp
->num_channels
> 1)
946 mlx5e_hairpin_rss_cleanup(hp
);
947 mlx5e_hairpin_destroy_transport(hp
);
948 mlx5_core_hairpin_destroy(hp
->pair
);
952 static inline u32
hash_hairpin_info(u16 peer_vhca_id
, u8 prio
)
954 return (peer_vhca_id
<< 16 | prio
);
957 static struct mlx5e_hairpin_entry
*mlx5e_hairpin_get(struct mlx5e_priv
*priv
,
958 u16 peer_vhca_id
, u8 prio
)
960 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
961 struct mlx5e_hairpin_entry
*hpe
;
962 u32 hash_key
= hash_hairpin_info(peer_vhca_id
, prio
);
964 hash_for_each_possible(tc
->hairpin_tbl
, hpe
,
965 hairpin_hlist
, hash_key
) {
966 if (hpe
->peer_vhca_id
== peer_vhca_id
&& hpe
->prio
== prio
) {
967 refcount_inc(&hpe
->refcnt
);
975 static void mlx5e_hairpin_put(struct mlx5e_priv
*priv
,
976 struct mlx5e_hairpin_entry
*hpe
)
978 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
979 /* no more hairpin flows for us, release the hairpin pair */
980 if (!refcount_dec_and_mutex_lock(&hpe
->refcnt
, &tc
->hairpin_tbl_lock
))
982 hash_del(&hpe
->hairpin_hlist
);
983 mutex_unlock(&tc
->hairpin_tbl_lock
);
985 if (!IS_ERR_OR_NULL(hpe
->hp
)) {
986 netdev_dbg(priv
->netdev
, "del hairpin: peer %s\n",
987 dev_name(hpe
->hp
->pair
->peer_mdev
->device
));
989 mlx5e_hairpin_destroy(hpe
->hp
);
992 WARN_ON(!list_empty(&hpe
->flows
));
996 #define UNKNOWN_MATCH_PRIO 8
998 static int mlx5e_hairpin_get_prio(struct mlx5e_priv
*priv
,
999 struct mlx5_flow_spec
*spec
, u8
*match_prio
,
1000 struct netlink_ext_ack
*extack
)
1002 void *headers_c
, *headers_v
;
1003 u8 prio_val
, prio_mask
= 0;
1006 #ifdef CONFIG_MLX5_CORE_EN_DCB
1007 if (priv
->dcbx_dp
.trust_state
!= MLX5_QPTS_TRUST_PCP
) {
1008 NL_SET_ERR_MSG_MOD(extack
,
1009 "only PCP trust state supported for hairpin");
1013 headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
, outer_headers
);
1014 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
1016 vlan_present
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, cvlan_tag
);
1018 prio_mask
= MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, first_prio
);
1019 prio_val
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, first_prio
);
1022 if (!vlan_present
|| !prio_mask
) {
1023 prio_val
= UNKNOWN_MATCH_PRIO
;
1024 } else if (prio_mask
!= 0x7) {
1025 NL_SET_ERR_MSG_MOD(extack
,
1026 "masked priority match not supported for hairpin");
1030 *match_prio
= prio_val
;
1034 static int debugfs_hairpin_num_active_get(void *data
, u64
*val
)
1036 struct mlx5e_tc_table
*tc
= data
;
1037 struct mlx5e_hairpin_entry
*hpe
;
1041 mutex_lock(&tc
->hairpin_tbl_lock
);
1042 hash_for_each(tc
->hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
1044 mutex_unlock(&tc
->hairpin_tbl_lock
);
1050 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active
,
1051 debugfs_hairpin_num_active_get
, NULL
, "%llu\n");
1053 static int debugfs_hairpin_table_dump_show(struct seq_file
*file
, void *priv
)
1056 struct mlx5e_tc_table
*tc
= file
->private;
1057 struct mlx5e_hairpin_entry
*hpe
;
1060 mutex_lock(&tc
->hairpin_tbl_lock
);
1061 hash_for_each(tc
->hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
1063 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
1064 hpe
->peer_vhca_id
, hpe
->prio
,
1065 refcount_read(&hpe
->refcnt
), hpe
->hp
->num_channels
,
1066 BIT(hpe
->hp
->log_num_packets
));
1067 mutex_unlock(&tc
->hairpin_tbl_lock
);
1071 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump
);
1073 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table
*tc
,
1074 struct dentry
*dfs_root
)
1076 if (IS_ERR_OR_NULL(dfs_root
))
1079 tc
->dfs_root
= debugfs_create_dir("tc", dfs_root
);
1081 debugfs_create_file("hairpin_num_active", 0444, tc
->dfs_root
, tc
,
1082 &fops_hairpin_num_active
);
1083 debugfs_create_file("hairpin_table_dump", 0444, tc
->dfs_root
, tc
,
1084 &debugfs_hairpin_table_dump_fops
);
1087 static int mlx5e_hairpin_flow_add(struct mlx5e_priv
*priv
,
1088 struct mlx5e_tc_flow
*flow
,
1089 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
1090 struct netlink_ext_ack
*extack
)
1092 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
1093 struct devlink
*devlink
= priv_to_devlink(priv
->mdev
);
1094 int peer_ifindex
= parse_attr
->mirred_ifindex
[0];
1095 union devlink_param_value val
= {};
1096 struct mlx5_hairpin_params params
;
1097 struct mlx5_core_dev
*peer_mdev
;
1098 struct mlx5e_hairpin_entry
*hpe
;
1099 struct mlx5e_hairpin
*hp
;
1104 peer_mdev
= mlx5e_hairpin_get_mdev(dev_net(priv
->netdev
), peer_ifindex
);
1105 if (IS_ERR(peer_mdev
)) {
1106 NL_SET_ERR_MSG_MOD(extack
, "invalid ifindex of mirred device");
1107 return PTR_ERR(peer_mdev
);
1110 if (!MLX5_CAP_GEN(priv
->mdev
, hairpin
) || !MLX5_CAP_GEN(peer_mdev
, hairpin
)) {
1111 NL_SET_ERR_MSG_MOD(extack
, "hairpin is not supported");
1115 peer_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
1116 err
= mlx5e_hairpin_get_prio(priv
, &parse_attr
->spec
, &match_prio
,
1121 mutex_lock(&tc
->hairpin_tbl_lock
);
1122 hpe
= mlx5e_hairpin_get(priv
, peer_id
, match_prio
);
1124 mutex_unlock(&tc
->hairpin_tbl_lock
);
1125 wait_for_completion(&hpe
->res_ready
);
1127 if (IS_ERR(hpe
->hp
)) {
1134 hpe
= kzalloc(sizeof(*hpe
), GFP_KERNEL
);
1136 mutex_unlock(&tc
->hairpin_tbl_lock
);
1140 spin_lock_init(&hpe
->flows_lock
);
1141 INIT_LIST_HEAD(&hpe
->flows
);
1142 INIT_LIST_HEAD(&hpe
->dead_peer_wait_list
);
1143 hpe
->peer_vhca_id
= peer_id
;
1144 hpe
->prio
= match_prio
;
1145 refcount_set(&hpe
->refcnt
, 1);
1146 init_completion(&hpe
->res_ready
);
1148 hash_add(tc
->hairpin_tbl
, &hpe
->hairpin_hlist
,
1149 hash_hairpin_info(peer_id
, match_prio
));
1150 mutex_unlock(&tc
->hairpin_tbl_lock
);
1152 err
= devl_param_driverinit_value_get(
1153 devlink
, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE
, &val
);
1159 params
.log_num_packets
= ilog2(val
.vu32
);
1160 params
.log_data_size
=
1162 params
.log_num_packets
+
1163 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv
->mdev
),
1164 MLX5_CAP_GEN(priv
->mdev
, log_min_hairpin_wq_data_sz
),
1165 MLX5_CAP_GEN(priv
->mdev
, log_max_hairpin_wq_data_sz
));
1167 params
.q_counter
= priv
->q_counter
;
1168 err
= devl_param_driverinit_value_get(
1169 devlink
, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES
, &val
);
1175 params
.num_channels
= val
.vu32
;
1177 hp
= mlx5e_hairpin_create(priv
, ¶ms
, peer_ifindex
);
1179 complete_all(&hpe
->res_ready
);
1185 netdev_dbg(priv
->netdev
, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1186 mlx5e_tir_get_tirn(&hp
->direct_tir
), hp
->pair
->rqn
[0],
1187 dev_name(hp
->pair
->peer_mdev
->device
),
1188 hp
->pair
->sqn
[0], match_prio
, params
.log_data_size
, params
.log_num_packets
);
1191 if (hpe
->hp
->num_channels
> 1) {
1192 flow_flag_set(flow
, HAIRPIN_RSS
);
1193 flow
->attr
->nic_attr
->hairpin_ft
=
1194 mlx5_get_ttc_flow_table(hpe
->hp
->ttc
);
1196 flow
->attr
->nic_attr
->hairpin_tirn
= mlx5e_tir_get_tirn(&hpe
->hp
->direct_tir
);
1200 spin_lock(&hpe
->flows_lock
);
1201 list_add(&flow
->hairpin
, &hpe
->flows
);
1202 spin_unlock(&hpe
->flows_lock
);
1207 mlx5e_hairpin_put(priv
, hpe
);
1211 static void mlx5e_hairpin_flow_del(struct mlx5e_priv
*priv
,
1212 struct mlx5e_tc_flow
*flow
)
1214 /* flow wasn't fully initialized */
1218 spin_lock(&flow
->hpe
->flows_lock
);
1219 list_del(&flow
->hairpin
);
1220 spin_unlock(&flow
->hpe
->flows_lock
);
1222 mlx5e_hairpin_put(priv
, flow
->hpe
);
1226 struct mlx5_flow_handle
*
1227 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv
*priv
,
1228 struct mlx5_flow_spec
*spec
,
1229 struct mlx5_flow_attr
*attr
)
1231 struct mlx5_flow_context
*flow_context
= &spec
->flow_context
;
1232 struct mlx5e_vlan_table
*vlan
= mlx5e_fs_get_vlan(priv
->fs
);
1233 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
1234 struct mlx5_nic_flow_attr
*nic_attr
= attr
->nic_attr
;
1235 struct mlx5_flow_destination dest
[2] = {};
1236 struct mlx5_fs_chains
*nic_chains
;
1237 struct mlx5_flow_act flow_act
= {
1238 .action
= attr
->action
,
1239 .flags
= FLOW_ACT_NO_APPEND
,
1241 struct mlx5_flow_handle
*rule
;
1242 struct mlx5_flow_table
*ft
;
1245 nic_chains
= mlx5e_nic_chains(tc
);
1246 flow_context
->flags
|= FLOW_CONTEXT_HAS_TAG
;
1247 flow_context
->flow_tag
= nic_attr
->flow_tag
;
1249 if (attr
->dest_ft
) {
1250 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
1251 dest
[dest_ix
].ft
= attr
->dest_ft
;
1253 } else if (nic_attr
->hairpin_ft
) {
1254 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
1255 dest
[dest_ix
].ft
= nic_attr
->hairpin_ft
;
1257 } else if (nic_attr
->hairpin_tirn
) {
1258 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_TIR
;
1259 dest
[dest_ix
].tir_num
= nic_attr
->hairpin_tirn
;
1261 } else if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
1262 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
;
1263 if (attr
->dest_chain
) {
1264 dest
[dest_ix
].ft
= mlx5_chains_get_table(nic_chains
,
1265 attr
->dest_chain
, 1,
1267 if (IS_ERR(dest
[dest_ix
].ft
))
1268 return ERR_CAST(dest
[dest_ix
].ft
);
1270 dest
[dest_ix
].ft
= mlx5e_vlan_get_flowtable(vlan
);
1275 if (dest
[0].type
== MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE
&&
1276 MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
))
1277 flow_act
.flags
|= FLOW_ACT_IGNORE_FLOW_LEVEL
;
1279 if (flow_act
.action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1280 dest
[dest_ix
].type
= MLX5_FLOW_DESTINATION_TYPE_COUNTER
;
1281 dest
[dest_ix
].counter_id
= mlx5_fc_id(attr
->counter
);
1285 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
)
1286 flow_act
.modify_hdr
= attr
->modify_hdr
;
1288 mutex_lock(&tc
->t_lock
);
1289 if (IS_ERR_OR_NULL(tc
->t
)) {
1290 /* Create the root table here if doesn't exist yet */
1292 mlx5_chains_get_table(nic_chains
, 0, 1, MLX5E_TC_FT_LEVEL
);
1294 if (IS_ERR(tc
->t
)) {
1295 mutex_unlock(&tc
->t_lock
);
1296 netdev_err(priv
->netdev
,
1297 "Failed to create tc offload table\n");
1298 rule
= ERR_CAST(tc
->t
);
1302 mutex_unlock(&tc
->t_lock
);
1304 if (attr
->chain
|| attr
->prio
)
1305 ft
= mlx5_chains_get_table(nic_chains
,
1306 attr
->chain
, attr
->prio
,
1312 rule
= ERR_CAST(ft
);
1316 if (attr
->outer_match_level
!= MLX5_MATCH_NONE
)
1317 spec
->match_criteria_enable
|= MLX5_MATCH_OUTER_HEADERS
;
1319 rule
= mlx5_add_flow_rules(ft
, spec
,
1320 &flow_act
, dest
, dest_ix
);
1327 if (attr
->chain
|| attr
->prio
)
1328 mlx5_chains_put_table(nic_chains
,
1329 attr
->chain
, attr
->prio
,
1332 if (attr
->dest_chain
)
1333 mlx5_chains_put_table(nic_chains
,
1334 attr
->dest_chain
, 1,
1337 return ERR_CAST(rule
);
1341 alloc_flow_attr_counter(struct mlx5_core_dev
*counter_dev
,
1342 struct mlx5_flow_attr
*attr
)
1345 struct mlx5_fc
*counter
;
1347 counter
= mlx5_fc_create(counter_dev
, true);
1348 if (IS_ERR(counter
))
1349 return PTR_ERR(counter
);
1351 attr
->counter
= counter
;
1356 mlx5e_tc_add_nic_flow(struct mlx5e_priv
*priv
,
1357 struct mlx5e_tc_flow
*flow
,
1358 struct netlink_ext_ack
*extack
)
1360 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1361 struct mlx5_flow_attr
*attr
= flow
->attr
;
1362 struct mlx5_core_dev
*dev
= priv
->mdev
;
1365 parse_attr
= attr
->parse_attr
;
1367 if (flow_flag_test(flow
, HAIRPIN
)) {
1368 err
= mlx5e_hairpin_flow_add(priv
, flow
, parse_attr
, extack
);
1373 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1374 err
= alloc_flow_attr_counter(dev
, attr
);
1379 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1380 err
= mlx5e_tc_attach_mod_hdr(priv
, flow
, attr
);
1385 flow
->rule
[0] = mlx5e_add_offloaded_nic_rule(priv
, &parse_attr
->spec
, attr
);
1386 return PTR_ERR_OR_ZERO(flow
->rule
[0]);
1389 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv
*priv
,
1390 struct mlx5_flow_handle
*rule
,
1391 struct mlx5_flow_attr
*attr
)
1393 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
1394 struct mlx5_fs_chains
*nic_chains
;
1396 nic_chains
= mlx5e_nic_chains(tc
);
1397 mlx5_del_flow_rules(rule
);
1399 if (attr
->chain
|| attr
->prio
)
1400 mlx5_chains_put_table(nic_chains
, attr
->chain
, attr
->prio
,
1403 if (attr
->dest_chain
)
1404 mlx5_chains_put_table(nic_chains
, attr
->dest_chain
, 1,
1408 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv
*priv
,
1409 struct mlx5e_tc_flow
*flow
)
1411 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
1412 struct mlx5_flow_attr
*attr
= flow
->attr
;
1414 flow_flag_clear(flow
, OFFLOADED
);
1416 if (!IS_ERR_OR_NULL(flow
->rule
[0]))
1417 mlx5e_del_offloaded_nic_rule(priv
, flow
->rule
[0], attr
);
1419 /* Remove root table if no rules are left to avoid
1420 * extra steering hops.
1422 mutex_lock(&tc
->t_lock
);
1423 if (!mlx5e_tc_num_filters(priv
, MLX5_TC_FLAG(NIC_OFFLOAD
)) &&
1424 !IS_ERR_OR_NULL(tc
->t
)) {
1425 mlx5_chains_put_table(mlx5e_nic_chains(tc
), 0, 1, MLX5E_TC_FT_LEVEL
);
1428 mutex_unlock(&tc
->t_lock
);
1430 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1431 mlx5e_mod_hdr_dealloc(&attr
->parse_attr
->mod_hdr_acts
);
1432 mlx5e_tc_detach_mod_hdr(priv
, flow
, attr
);
1435 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
1436 mlx5_fc_destroy(priv
->mdev
, attr
->counter
);
1438 if (flow_flag_test(flow
, HAIRPIN
))
1439 mlx5e_hairpin_flow_del(priv
, flow
);
1441 free_flow_post_acts(flow
);
1443 kvfree(attr
->parse_attr
);
1447 struct mlx5_flow_handle
*
1448 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch
*esw
,
1449 struct mlx5e_tc_flow
*flow
,
1450 struct mlx5_flow_spec
*spec
,
1451 struct mlx5_flow_attr
*attr
)
1453 struct mlx5_flow_handle
*rule
;
1455 if (attr
->flags
& MLX5_ATTR_FLAG_SLOW_PATH
)
1456 return mlx5_eswitch_add_offloaded_rule(esw
, spec
, attr
);
1458 rule
= mlx5e_tc_rule_offload(flow
->priv
, spec
, attr
);
1463 if (attr
->esw_attr
->split_count
) {
1464 flow
->rule
[1] = mlx5_eswitch_add_fwd_rule(esw
, spec
, attr
);
1465 if (IS_ERR(flow
->rule
[1]))
1472 mlx5e_tc_rule_unoffload(flow
->priv
, rule
, attr
);
1473 return flow
->rule
[1];
1476 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch
*esw
,
1477 struct mlx5e_tc_flow
*flow
,
1478 struct mlx5_flow_attr
*attr
)
1480 flow_flag_clear(flow
, OFFLOADED
);
1482 if (attr
->flags
& MLX5_ATTR_FLAG_SLOW_PATH
)
1483 return mlx5_eswitch_del_offloaded_rule(esw
, flow
->rule
[0], attr
);
1485 if (attr
->esw_attr
->split_count
)
1486 mlx5_eswitch_del_fwd_rule(esw
, flow
->rule
[1], attr
);
1488 mlx5e_tc_rule_unoffload(flow
->priv
, flow
->rule
[0], attr
);
1491 struct mlx5_flow_handle
*
1492 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch
*esw
,
1493 struct mlx5e_tc_flow
*flow
,
1494 struct mlx5_flow_spec
*spec
)
1496 struct mlx5e_tc_mod_hdr_acts mod_acts
= {};
1497 struct mlx5e_mod_hdr_handle
*mh
= NULL
;
1498 struct mlx5_flow_attr
*slow_attr
;
1499 struct mlx5_flow_handle
*rule
;
1500 bool fwd_and_modify_cap
;
1501 u32 chain_mapping
= 0;
1504 slow_attr
= mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB
);
1506 return ERR_PTR(-ENOMEM
);
1508 memcpy(slow_attr
, flow
->attr
, ESW_FLOW_ATTR_SZ
);
1509 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1510 slow_attr
->esw_attr
->split_count
= 0;
1511 slow_attr
->flags
|= MLX5_ATTR_FLAG_SLOW_PATH
;
1513 fwd_and_modify_cap
= MLX5_CAP_ESW_FLOWTABLE((esw
)->dev
, fdb_modify_header_fwd_to_table
);
1514 if (!fwd_and_modify_cap
)
1517 err
= mlx5_chains_get_chain_mapping(esw_chains(esw
), flow
->attr
->chain
, &chain_mapping
);
1521 err
= mlx5e_tc_match_to_reg_set(esw
->dev
, &mod_acts
, MLX5_FLOW_NAMESPACE_FDB
,
1522 MAPPED_OBJ_TO_REG
, chain_mapping
);
1526 mh
= mlx5e_mod_hdr_attach(esw
->dev
, get_mod_hdr_table(flow
->priv
, flow
),
1527 MLX5_FLOW_NAMESPACE_FDB
, &mod_acts
);
1533 slow_attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1534 slow_attr
->modify_hdr
= mlx5e_mod_hdr_get(mh
);
1537 rule
= mlx5e_tc_offload_fdb_rules(esw
, flow
, spec
, slow_attr
);
1539 err
= PTR_ERR(rule
);
1543 flow
->attr
->slow_mh
= mh
;
1544 flow
->chain_mapping
= chain_mapping
;
1545 flow_flag_set(flow
, SLOW
);
1547 mlx5e_mod_hdr_dealloc(&mod_acts
);
1553 if (fwd_and_modify_cap
)
1554 mlx5e_mod_hdr_detach(esw
->dev
, get_mod_hdr_table(flow
->priv
, flow
), mh
);
1557 if (fwd_and_modify_cap
)
1558 mlx5_chains_put_chain_mapping(esw_chains(esw
), chain_mapping
);
1560 mlx5e_mod_hdr_dealloc(&mod_acts
);
1562 return ERR_PTR(err
);
1565 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch
*esw
,
1566 struct mlx5e_tc_flow
*flow
)
1568 struct mlx5e_mod_hdr_handle
*slow_mh
= flow
->attr
->slow_mh
;
1569 struct mlx5_flow_attr
*slow_attr
;
1571 slow_attr
= mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB
);
1573 mlx5_core_warn(flow
->priv
->mdev
, "Unable to alloc attr to unoffload slow path rule\n");
1577 memcpy(slow_attr
, flow
->attr
, ESW_FLOW_ATTR_SZ
);
1578 slow_attr
->action
= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
1579 slow_attr
->esw_attr
->split_count
= 0;
1580 slow_attr
->flags
|= MLX5_ATTR_FLAG_SLOW_PATH
;
1582 slow_attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1583 slow_attr
->modify_hdr
= mlx5e_mod_hdr_get(slow_mh
);
1585 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, slow_attr
);
1587 mlx5e_mod_hdr_detach(esw
->dev
, get_mod_hdr_table(flow
->priv
, flow
), slow_mh
);
1588 mlx5_chains_put_chain_mapping(esw_chains(esw
), flow
->chain_mapping
);
1589 flow
->chain_mapping
= 0;
1590 flow
->attr
->slow_mh
= NULL
;
1592 flow_flag_clear(flow
, SLOW
);
1596 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1599 static void unready_flow_add(struct mlx5e_tc_flow
*flow
,
1600 struct list_head
*unready_flows
)
1602 flow_flag_set(flow
, NOT_READY
);
1603 list_add_tail(&flow
->unready
, unready_flows
);
1606 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1609 static void unready_flow_del(struct mlx5e_tc_flow
*flow
)
1611 list_del(&flow
->unready
);
1612 flow_flag_clear(flow
, NOT_READY
);
1615 static void add_unready_flow(struct mlx5e_tc_flow
*flow
)
1617 struct mlx5_rep_uplink_priv
*uplink_priv
;
1618 struct mlx5e_rep_priv
*rpriv
;
1619 struct mlx5_eswitch
*esw
;
1621 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1622 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1623 uplink_priv
= &rpriv
->uplink_priv
;
1625 mutex_lock(&uplink_priv
->unready_flows_lock
);
1626 unready_flow_add(flow
, &uplink_priv
->unready_flows
);
1627 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1630 static void remove_unready_flow(struct mlx5e_tc_flow
*flow
)
1632 struct mlx5_rep_uplink_priv
*uplink_priv
;
1633 struct mlx5e_rep_priv
*rpriv
;
1634 struct mlx5_eswitch
*esw
;
1636 esw
= flow
->priv
->mdev
->priv
.eswitch
;
1637 rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
1638 uplink_priv
= &rpriv
->uplink_priv
;
1640 mutex_lock(&uplink_priv
->unready_flows_lock
);
1641 unready_flow_del(flow
);
1642 mutex_unlock(&uplink_priv
->unready_flows_lock
);
1645 bool mlx5e_tc_is_vf_tunnel(struct net_device
*out_dev
, struct net_device
*route_dev
)
1647 struct mlx5_core_dev
*out_mdev
, *route_mdev
;
1648 struct mlx5e_priv
*out_priv
, *route_priv
;
1650 out_priv
= netdev_priv(out_dev
);
1651 out_mdev
= out_priv
->mdev
;
1652 route_priv
= netdev_priv(route_dev
);
1653 route_mdev
= route_priv
->mdev
;
1655 if (out_mdev
->coredev_type
!= MLX5_COREDEV_PF
)
1658 if (route_mdev
->coredev_type
!= MLX5_COREDEV_VF
&&
1659 route_mdev
->coredev_type
!= MLX5_COREDEV_SF
)
1662 return mlx5e_same_hw_devs(out_priv
, route_priv
);
1665 int mlx5e_tc_query_route_vport(struct net_device
*out_dev
, struct net_device
*route_dev
, u16
*vport
)
1667 struct mlx5e_priv
*out_priv
, *route_priv
;
1668 struct mlx5_devcom
*devcom
= NULL
;
1669 struct mlx5_core_dev
*route_mdev
;
1670 struct mlx5_eswitch
*esw
;
1674 out_priv
= netdev_priv(out_dev
);
1675 esw
= out_priv
->mdev
->priv
.eswitch
;
1676 route_priv
= netdev_priv(route_dev
);
1677 route_mdev
= route_priv
->mdev
;
1679 vhca_id
= MLX5_CAP_GEN(route_mdev
, vhca_id
);
1680 if (mlx5_lag_is_active(out_priv
->mdev
)) {
1681 /* In lag case we may get devices from different eswitch instances.
1682 * If we failed to get vport num, it means, mostly, that we on the wrong
1685 err
= mlx5_eswitch_vhca_id_to_vport(esw
, vhca_id
, vport
);
1689 devcom
= out_priv
->mdev
->priv
.devcom
;
1690 esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1695 err
= mlx5_eswitch_vhca_id_to_vport(esw
, vhca_id
, vport
);
1697 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
1702 set_encap_dests(struct mlx5e_priv
*priv
,
1703 struct mlx5e_tc_flow
*flow
,
1704 struct mlx5_flow_attr
*attr
,
1705 struct netlink_ext_ack
*extack
,
1708 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1709 struct mlx5_esw_flow_attr
*esw_attr
;
1710 struct net_device
*encap_dev
= NULL
;
1711 struct mlx5e_rep_priv
*rpriv
;
1712 struct mlx5e_priv
*out_priv
;
1716 if (!mlx5e_is_eswitch_flow(flow
))
1719 parse_attr
= attr
->parse_attr
;
1720 esw_attr
= attr
->esw_attr
;
1723 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1724 struct net_device
*out_dev
;
1727 if (!(esw_attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1730 mirred_ifindex
= parse_attr
->mirred_ifindex
[out_index
];
1731 out_dev
= dev_get_by_index(dev_net(priv
->netdev
), mirred_ifindex
);
1733 NL_SET_ERR_MSG_MOD(extack
, "Requested mirred device not found");
1737 err
= mlx5e_attach_encap(priv
, flow
, attr
, out_dev
, out_index
,
1738 extack
, &encap_dev
);
1743 if (esw_attr
->dests
[out_index
].flags
&
1744 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE
&&
1745 !esw_attr
->dest_int_port
)
1748 out_priv
= netdev_priv(encap_dev
);
1749 rpriv
= out_priv
->ppriv
;
1750 esw_attr
->dests
[out_index
].rep
= rpriv
->rep
;
1751 esw_attr
->dests
[out_index
].mdev
= out_priv
->mdev
;
1754 if (*vf_tun
&& esw_attr
->out_count
> 1) {
1755 NL_SET_ERR_MSG_MOD(extack
, "VF tunnel encap with mirroring is not supported");
1765 clean_encap_dests(struct mlx5e_priv
*priv
,
1766 struct mlx5e_tc_flow
*flow
,
1767 struct mlx5_flow_attr
*attr
)
1769 struct mlx5_esw_flow_attr
*esw_attr
;
1772 if (!mlx5e_is_eswitch_flow(flow
))
1775 esw_attr
= attr
->esw_attr
;
1777 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++) {
1778 if (!(esw_attr
->dests
[out_index
].flags
& MLX5_ESW_DEST_ENCAP
))
1781 mlx5e_detach_encap(priv
, flow
, attr
, out_index
);
1782 kfree(attr
->parse_attr
->tun_info
[out_index
]);
1787 verify_attr_actions(u32 actions
, struct netlink_ext_ack
*extack
)
1790 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
| MLX5_FLOW_CONTEXT_ACTION_DROP
))) {
1791 NL_SET_ERR_MSG_MOD(extack
, "Rule must have at least one forward/drop action");
1796 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
| MLX5_FLOW_CONTEXT_ACTION_DROP
))) {
1797 NL_SET_ERR_MSG_MOD(extack
, "Rule cannot support forward+drop action");
1801 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
&&
1802 actions
& MLX5_FLOW_CONTEXT_ACTION_DROP
) {
1803 NL_SET_ERR_MSG_MOD(extack
, "Drop with modify header action is not supported");
1811 post_process_attr(struct mlx5e_tc_flow
*flow
,
1812 struct mlx5_flow_attr
*attr
,
1813 struct netlink_ext_ack
*extack
)
1818 err
= verify_attr_actions(attr
->action
, extack
);
1822 err
= set_encap_dests(flow
->priv
, flow
, attr
, extack
, &vf_tun
);
1826 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1827 err
= mlx5e_tc_attach_mod_hdr(flow
->priv
, flow
, attr
);
1832 if (attr
->branch_true
&&
1833 attr
->branch_true
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1834 err
= mlx5e_tc_attach_mod_hdr(flow
->priv
, flow
, attr
->branch_true
);
1839 if (attr
->branch_false
&&
1840 attr
->branch_false
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
1841 err
= mlx5e_tc_attach_mod_hdr(flow
->priv
, flow
, attr
->branch_false
);
1846 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
) {
1847 err
= alloc_flow_attr_counter(get_flow_counter_dev(flow
), attr
);
1857 mlx5e_tc_add_fdb_flow(struct mlx5e_priv
*priv
,
1858 struct mlx5e_tc_flow
*flow
,
1859 struct netlink_ext_ack
*extack
)
1861 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
1862 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
1863 struct mlx5_flow_attr
*attr
= flow
->attr
;
1864 struct mlx5_esw_flow_attr
*esw_attr
;
1865 u32 max_prio
, max_chain
;
1868 parse_attr
= attr
->parse_attr
;
1869 esw_attr
= attr
->esw_attr
;
1871 /* We check chain range only for tc flows.
1872 * For ft flows, we checked attr->chain was originally 0 and set it to
1873 * FDB_FT_CHAIN which is outside tc range.
1874 * See mlx5e_rep_setup_ft_cb().
1876 max_chain
= mlx5_chains_get_chain_range(esw_chains(esw
));
1877 if (!mlx5e_is_ft_flow(flow
) && attr
->chain
> max_chain
) {
1878 NL_SET_ERR_MSG_MOD(extack
,
1879 "Requested chain is out of supported range");
1884 max_prio
= mlx5_chains_get_prio_range(esw_chains(esw
));
1885 if (attr
->prio
> max_prio
) {
1886 NL_SET_ERR_MSG_MOD(extack
,
1887 "Requested priority is out of supported range");
1892 if (flow_flag_test(flow
, TUN_RX
)) {
1893 err
= mlx5e_attach_decap_route(priv
, flow
);
1897 if (!attr
->chain
&& esw_attr
->int_port
&&
1898 attr
->action
& MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
) {
1899 /* If decap route device is internal port, change the
1900 * source vport value in reg_c0 back to uplink just in
1901 * case the rule performs goto chain > 0. If we have a miss
1902 * on chain > 0 we want the metadata regs to hold the
1903 * chain id so SW will resume handling of this packet
1904 * from the proper chain.
1906 u32 metadata
= mlx5_eswitch_get_vport_metadata_for_set(esw
,
1907 esw_attr
->in_rep
->vport
);
1909 err
= mlx5e_tc_match_to_reg_set(priv
->mdev
, &parse_attr
->mod_hdr_acts
,
1910 MLX5_FLOW_NAMESPACE_FDB
, VPORT_TO_REG
,
1915 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
1919 if (flow_flag_test(flow
, L3_TO_L2_DECAP
)) {
1920 err
= mlx5e_attach_decap(priv
, flow
, extack
);
1925 if (netif_is_ovs_master(parse_attr
->filter_dev
)) {
1926 struct mlx5e_tc_int_port
*int_port
;
1929 NL_SET_ERR_MSG_MOD(extack
,
1930 "Internal port rule is only supported on chain 0");
1935 if (attr
->dest_chain
) {
1936 NL_SET_ERR_MSG_MOD(extack
,
1937 "Internal port rule offload doesn't support goto action");
1942 int_port
= mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv
),
1943 parse_attr
->filter_dev
->ifindex
,
1944 flow_flag_test(flow
, EGRESS
) ?
1945 MLX5E_TC_INT_PORT_EGRESS
:
1946 MLX5E_TC_INT_PORT_INGRESS
);
1947 if (IS_ERR(int_port
)) {
1948 err
= PTR_ERR(int_port
);
1952 esw_attr
->int_port
= int_port
;
1955 err
= post_process_attr(flow
, attr
, extack
);
1959 err
= mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv
), flow
);
1963 /* we get here if one of the following takes place:
1964 * (1) there's no error
1965 * (2) there's an encap action and we don't have valid neigh
1967 if (flow_flag_test(flow
, SLOW
))
1968 flow
->rule
[0] = mlx5e_tc_offload_to_slow_path(esw
, flow
, &parse_attr
->spec
);
1970 flow
->rule
[0] = mlx5e_tc_offload_fdb_rules(esw
, flow
, &parse_attr
->spec
, attr
);
1972 if (IS_ERR(flow
->rule
[0])) {
1973 err
= PTR_ERR(flow
->rule
[0]);
1976 flow_flag_set(flow
, OFFLOADED
);
1981 flow_flag_set(flow
, FAILED
);
1985 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow
*flow
)
1987 struct mlx5_flow_spec
*spec
= &flow
->attr
->parse_attr
->spec
;
1988 void *headers_v
= MLX5_ADDR_OF(fte_match_param
,
1991 u32 geneve_tlv_opt_0_data
= MLX5_GET(fte_match_set_misc3
,
1993 geneve_tlv_option_0_data
);
1995 return !!geneve_tlv_opt_0_data
;
1998 static void free_branch_attr(struct mlx5e_tc_flow
*flow
, struct mlx5_flow_attr
*attr
)
2003 mlx5_free_flow_attr_actions(flow
, attr
);
2004 kvfree(attr
->parse_attr
);
2008 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv
*priv
,
2009 struct mlx5e_tc_flow
*flow
)
2011 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2012 struct mlx5_flow_attr
*attr
= flow
->attr
;
2013 struct mlx5_esw_flow_attr
*esw_attr
;
2015 esw_attr
= attr
->esw_attr
;
2016 mlx5e_put_flow_tunnel_id(flow
);
2018 if (flow_flag_test(flow
, NOT_READY
))
2019 remove_unready_flow(flow
);
2021 if (mlx5e_is_offloaded_flow(flow
)) {
2022 if (flow_flag_test(flow
, SLOW
))
2023 mlx5e_tc_unoffload_from_slow_path(esw
, flow
);
2025 mlx5e_tc_unoffload_fdb_rules(esw
, flow
, attr
);
2027 complete_all(&flow
->del_hw_done
);
2029 if (mlx5_flow_has_geneve_opt(flow
))
2030 mlx5_geneve_tlv_option_del(priv
->mdev
->geneve
);
2032 if (flow
->decap_route
)
2033 mlx5e_detach_decap_route(priv
, flow
);
2035 mlx5_tc_ct_match_del(get_ct_priv(priv
), &flow
->attr
->ct_attr
);
2037 if (esw_attr
->int_port
)
2038 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv
), esw_attr
->int_port
);
2040 if (esw_attr
->dest_int_port
)
2041 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv
), esw_attr
->dest_int_port
);
2043 if (flow_flag_test(flow
, L3_TO_L2_DECAP
))
2044 mlx5e_detach_decap(priv
, flow
);
2046 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv
), flow
);
2048 free_flow_post_acts(flow
);
2049 mlx5_free_flow_attr_actions(flow
, attr
);
2051 kvfree(attr
->esw_attr
->rx_tun_attr
);
2052 kvfree(attr
->parse_attr
);
2056 struct mlx5_fc
*mlx5e_tc_get_counter(struct mlx5e_tc_flow
*flow
)
2058 struct mlx5_flow_attr
*attr
;
2060 attr
= list_first_entry(&flow
->attrs
, struct mlx5_flow_attr
, list
);
2061 return attr
->counter
;
2064 /* Iterate over tmp_list of flows attached to flow_list head. */
2065 void mlx5e_put_flow_list(struct mlx5e_priv
*priv
, struct list_head
*flow_list
)
2067 struct mlx5e_tc_flow
*flow
, *tmp
;
2069 list_for_each_entry_safe(flow
, tmp
, flow_list
, tmp_list
)
2070 mlx5e_flow_put(priv
, flow
);
2073 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
2075 struct mlx5_eswitch
*esw
= flow
->priv
->mdev
->priv
.eswitch
;
2077 if (!flow_flag_test(flow
, ESWITCH
) ||
2078 !flow_flag_test(flow
, DUP
))
2081 mutex_lock(&esw
->offloads
.peer_mutex
);
2082 list_del(&flow
->peer
);
2083 mutex_unlock(&esw
->offloads
.peer_mutex
);
2085 flow_flag_clear(flow
, DUP
);
2087 if (refcount_dec_and_test(&flow
->peer_flow
->refcnt
)) {
2088 mlx5e_tc_del_fdb_flow(flow
->peer_flow
->priv
, flow
->peer_flow
);
2089 kfree(flow
->peer_flow
);
2092 flow
->peer_flow
= NULL
;
2095 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow
*flow
)
2097 struct mlx5_core_dev
*dev
= flow
->priv
->mdev
;
2098 struct mlx5_devcom
*devcom
= dev
->priv
.devcom
;
2099 struct mlx5_eswitch
*peer_esw
;
2101 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
2105 __mlx5e_tc_del_fdb_peer_flow(flow
);
2106 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
2109 static void mlx5e_tc_del_flow(struct mlx5e_priv
*priv
,
2110 struct mlx5e_tc_flow
*flow
)
2112 if (mlx5e_is_eswitch_flow(flow
)) {
2113 mlx5e_tc_del_fdb_peer_flow(flow
);
2114 mlx5e_tc_del_fdb_flow(priv
, flow
);
2116 mlx5e_tc_del_nic_flow(priv
, flow
);
2120 static bool flow_requires_tunnel_mapping(u32 chain
, struct flow_cls_offload
*f
)
2122 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2123 struct flow_action
*flow_action
= &rule
->action
;
2124 const struct flow_action_entry
*act
;
2130 flow_action_for_each(i
, act
, flow_action
) {
2132 case FLOW_ACTION_GOTO
:
2134 case FLOW_ACTION_SAMPLE
:
2145 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv
*priv
,
2146 struct flow_dissector_key_enc_opts
*opts
,
2147 struct netlink_ext_ack
*extack
,
2150 struct geneve_opt
*opt
;
2155 while (opts
->len
> off
) {
2156 opt
= (struct geneve_opt
*)&opts
->data
[off
];
2158 if (!(*dont_care
) || opt
->opt_class
|| opt
->type
||
2159 memchr_inv(opt
->opt_data
, 0, opt
->length
* 4)) {
2162 if (opt
->opt_class
!= htons(U16_MAX
) ||
2163 opt
->type
!= U8_MAX
) {
2164 NL_SET_ERR_MSG_MOD(extack
,
2165 "Partial match of tunnel options in chain > 0 isn't supported");
2166 netdev_warn(priv
->netdev
,
2167 "Partial match of tunnel options in chain > 0 isn't supported");
2172 off
+= sizeof(struct geneve_opt
) + opt
->length
* 4;
2178 #define COPY_DISSECTOR(rule, diss_key, dst)\
2180 struct flow_rule *__rule = (rule);\
2181 typeof(dst) __dst = dst;\
2184 skb_flow_dissector_target(__rule->match.dissector,\
2186 __rule->match.key),\
2190 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv
*priv
,
2191 struct mlx5e_tc_flow
*flow
,
2192 struct flow_cls_offload
*f
,
2193 struct net_device
*filter_dev
)
2195 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2196 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2197 struct mlx5e_tc_mod_hdr_acts
*mod_hdr_acts
;
2198 struct flow_match_enc_opts enc_opts_match
;
2199 struct tunnel_match_enc_opts tun_enc_opts
;
2200 struct mlx5_rep_uplink_priv
*uplink_priv
;
2201 struct mlx5_flow_attr
*attr
= flow
->attr
;
2202 struct mlx5e_rep_priv
*uplink_rpriv
;
2203 struct tunnel_match_key tunnel_key
;
2204 bool enc_opts_is_dont_care
= true;
2205 u32 tun_id
, enc_opts_id
= 0;
2206 struct mlx5_eswitch
*esw
;
2210 esw
= priv
->mdev
->priv
.eswitch
;
2211 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
2212 uplink_priv
= &uplink_rpriv
->uplink_priv
;
2214 memset(&tunnel_key
, 0, sizeof(tunnel_key
));
2215 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_CONTROL
,
2216 &tunnel_key
.enc_control
);
2217 if (tunnel_key
.enc_control
.addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
)
2218 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
,
2219 &tunnel_key
.enc_ipv4
);
2221 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
,
2222 &tunnel_key
.enc_ipv6
);
2223 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_IP
, &tunnel_key
.enc_ip
);
2224 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_PORTS
,
2225 &tunnel_key
.enc_tp
);
2226 COPY_DISSECTOR(rule
, FLOW_DISSECTOR_KEY_ENC_KEYID
,
2227 &tunnel_key
.enc_key_id
);
2228 tunnel_key
.filter_ifindex
= filter_dev
->ifindex
;
2230 err
= mapping_add(uplink_priv
->tunnel_mapping
, &tunnel_key
, &tun_id
);
2234 flow_rule_match_enc_opts(rule
, &enc_opts_match
);
2235 err
= enc_opts_is_dont_care_or_full_match(priv
,
2236 enc_opts_match
.mask
,
2238 &enc_opts_is_dont_care
);
2242 if (!enc_opts_is_dont_care
) {
2243 memset(&tun_enc_opts
, 0, sizeof(tun_enc_opts
));
2244 memcpy(&tun_enc_opts
.key
, enc_opts_match
.key
,
2245 sizeof(*enc_opts_match
.key
));
2246 memcpy(&tun_enc_opts
.mask
, enc_opts_match
.mask
,
2247 sizeof(*enc_opts_match
.mask
));
2249 err
= mapping_add(uplink_priv
->tunnel_enc_opts_mapping
,
2250 &tun_enc_opts
, &enc_opts_id
);
2255 value
= tun_id
<< ENC_OPTS_BITS
| enc_opts_id
;
2256 mask
= enc_opts_id
? TUNNEL_ID_MASK
:
2257 (TUNNEL_ID_MASK
& ~ENC_OPTS_BITS_MASK
);
2260 mlx5e_tc_match_to_reg_match(&attr
->parse_attr
->spec
,
2261 TUNNEL_TO_REG
, value
, mask
);
2263 mod_hdr_acts
= &attr
->parse_attr
->mod_hdr_acts
;
2264 err
= mlx5e_tc_match_to_reg_set(priv
->mdev
,
2265 mod_hdr_acts
, MLX5_FLOW_NAMESPACE_FDB
,
2266 TUNNEL_TO_REG
, value
);
2270 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
2273 flow
->attr
->tunnel_id
= value
;
2278 mapping_remove(uplink_priv
->tunnel_enc_opts_mapping
,
2281 mapping_remove(uplink_priv
->tunnel_mapping
, tun_id
);
2285 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow
*flow
)
2287 u32 enc_opts_id
= flow
->attr
->tunnel_id
& ENC_OPTS_BITS_MASK
;
2288 u32 tun_id
= flow
->attr
->tunnel_id
>> ENC_OPTS_BITS
;
2289 struct mlx5_rep_uplink_priv
*uplink_priv
;
2290 struct mlx5e_rep_priv
*uplink_rpriv
;
2291 struct mlx5_eswitch
*esw
;
2293 esw
= flow
->priv
->mdev
->priv
.eswitch
;
2294 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
2295 uplink_priv
= &uplink_rpriv
->uplink_priv
;
2298 mapping_remove(uplink_priv
->tunnel_mapping
, tun_id
);
2300 mapping_remove(uplink_priv
->tunnel_enc_opts_mapping
,
2304 void mlx5e_tc_set_ethertype(struct mlx5_core_dev
*mdev
,
2305 struct flow_match_basic
*match
, bool outer
,
2306 void *headers_c
, void *headers_v
)
2308 bool ip_version_cap
;
2310 ip_version_cap
= outer
?
2311 MLX5_CAP_FLOWTABLE_NIC_RX(mdev
,
2312 ft_field_support
.outer_ip_version
) :
2313 MLX5_CAP_FLOWTABLE_NIC_RX(mdev
,
2314 ft_field_support
.inner_ip_version
);
2316 if (ip_version_cap
&& match
->mask
->n_proto
== htons(0xFFFF) &&
2317 (match
->key
->n_proto
== htons(ETH_P_IP
) ||
2318 match
->key
->n_proto
== htons(ETH_P_IPV6
))) {
2319 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4
, headers_c
, ip_version
);
2320 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_version
,
2321 match
->key
->n_proto
== htons(ETH_P_IP
) ? 4 : 6);
2323 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ethertype
,
2324 ntohs(match
->mask
->n_proto
));
2325 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ethertype
,
2326 ntohs(match
->key
->n_proto
));
2330 u8
mlx5e_tc_get_ip_version(struct mlx5_flow_spec
*spec
, bool outer
)
2337 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, outer_headers
);
2339 headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
, inner_headers
);
2341 ip_version
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_version
);
2342 /* Return ip_version converted from ethertype anyway */
2344 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
2345 if (ethertype
== ETH_P_IP
|| ethertype
== ETH_P_ARP
)
2347 else if (ethertype
== ETH_P_IPV6
)
2353 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2354 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2355 * +---------+----------------------------------------+
2356 * |Arriving | Arriving Outer Header |
2357 * | Inner +---------+---------+---------+----------+
2358 * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
2359 * +---------+---------+---------+---------+----------+
2360 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
2361 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
2362 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
2363 * | CE | CE | CE | CE | CE |
2364 * +---------+---------+---------+---------+----------+
2366 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2367 * the inner ip_ecn value before hardware decap action.
2369 * Cells marked are changed from original inner packet ip_ecn value during decap, and
2370 * so matching those values on inner ip_ecn before decap will fail.
2372 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2373 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2374 * and such we can drop the inner ip_ecn=CE match.
2377 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv
*priv
,
2378 struct flow_cls_offload
*f
,
2379 bool *match_inner_ecn
)
2381 u8 outer_ecn_mask
= 0, outer_ecn_key
= 0, inner_ecn_mask
= 0, inner_ecn_key
= 0;
2382 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2383 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2384 struct flow_match_ip match
;
2386 *match_inner_ecn
= true;
2388 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ENC_IP
)) {
2389 flow_rule_match_enc_ip(rule
, &match
);
2390 outer_ecn_key
= match
.key
->tos
& INET_ECN_MASK
;
2391 outer_ecn_mask
= match
.mask
->tos
& INET_ECN_MASK
;
2394 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2395 flow_rule_match_ip(rule
, &match
);
2396 inner_ecn_key
= match
.key
->tos
& INET_ECN_MASK
;
2397 inner_ecn_mask
= match
.mask
->tos
& INET_ECN_MASK
;
2400 if (outer_ecn_mask
!= 0 && outer_ecn_mask
!= INET_ECN_MASK
) {
2401 NL_SET_ERR_MSG_MOD(extack
, "Partial match on enc_tos ecn bits isn't supported");
2402 netdev_warn(priv
->netdev
, "Partial match on enc_tos ecn bits isn't supported");
2406 if (!outer_ecn_mask
) {
2407 if (!inner_ecn_mask
)
2410 NL_SET_ERR_MSG_MOD(extack
,
2411 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2412 netdev_warn(priv
->netdev
,
2413 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2417 if (inner_ecn_mask
&& inner_ecn_mask
!= INET_ECN_MASK
) {
2418 NL_SET_ERR_MSG_MOD(extack
,
2419 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2420 netdev_warn(priv
->netdev
,
2421 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2425 if (!inner_ecn_mask
)
2428 /* Both inner and outer have full mask on ecn */
2430 if (outer_ecn_key
== INET_ECN_ECT_1
) {
2431 /* inner ecn might change by DECAP action */
2433 NL_SET_ERR_MSG_MOD(extack
, "Match on enc_tos ecn = ECT(1) isn't supported");
2434 netdev_warn(priv
->netdev
, "Match on enc_tos ecn = ECT(1) isn't supported");
2438 if (outer_ecn_key
!= INET_ECN_CE
)
2441 if (inner_ecn_key
!= INET_ECN_CE
) {
2442 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2443 NL_SET_ERR_MSG_MOD(extack
,
2444 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2445 netdev_warn(priv
->netdev
,
2446 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2450 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2451 * drop match on inner ecn
2453 *match_inner_ecn
= false;
2458 static int parse_tunnel_attr(struct mlx5e_priv
*priv
,
2459 struct mlx5e_tc_flow
*flow
,
2460 struct mlx5_flow_spec
*spec
,
2461 struct flow_cls_offload
*f
,
2462 struct net_device
*filter_dev
,
2466 struct mlx5e_tc_tunnel
*tunnel
= mlx5e_get_tc_tun(filter_dev
);
2467 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
2468 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2469 bool needs_mapping
, sets_mapping
;
2472 if (!mlx5e_is_eswitch_flow(flow
)) {
2473 NL_SET_ERR_MSG_MOD(extack
, "Match on tunnel is not supported");
2477 needs_mapping
= !!flow
->attr
->chain
;
2478 sets_mapping
= flow_requires_tunnel_mapping(flow
->attr
->chain
, f
);
2479 *match_inner
= !needs_mapping
;
2481 if ((needs_mapping
|| sets_mapping
) &&
2482 !mlx5_eswitch_reg_c1_loopback_enabled(esw
)) {
2483 NL_SET_ERR_MSG_MOD(extack
,
2484 "Chains on tunnel devices isn't supported without register loopback support");
2485 netdev_warn(priv
->netdev
,
2486 "Chains on tunnel devices isn't supported without register loopback support");
2490 if (!flow
->attr
->chain
) {
2491 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, spec
, f
,
2494 NL_SET_ERR_MSG_MOD(extack
,
2495 "Failed to parse tunnel attributes");
2496 netdev_warn(priv
->netdev
,
2497 "Failed to parse tunnel attributes");
2501 /* With mpls over udp we decapsulate using packet reformat
2504 if (!netif_is_bareudp(filter_dev
))
2505 flow
->attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DECAP
;
2506 err
= mlx5e_tc_set_attr_rx_tun(flow
, spec
);
2509 } else if (tunnel
) {
2510 struct mlx5_flow_spec
*tmp_spec
;
2512 tmp_spec
= kvzalloc(sizeof(*tmp_spec
), GFP_KERNEL
);
2514 NL_SET_ERR_MSG_MOD(extack
, "Failed to allocate memory for tunnel tmp spec");
2515 netdev_warn(priv
->netdev
, "Failed to allocate memory for tunnel tmp spec");
2518 memcpy(tmp_spec
, spec
, sizeof(*tmp_spec
));
2520 err
= mlx5e_tc_tun_parse(filter_dev
, priv
, tmp_spec
, f
, match_level
);
2523 NL_SET_ERR_MSG_MOD(extack
, "Failed to parse tunnel attributes");
2524 netdev_warn(priv
->netdev
, "Failed to parse tunnel attributes");
2527 err
= mlx5e_tc_set_attr_rx_tun(flow
, tmp_spec
);
2533 if (!needs_mapping
&& !sets_mapping
)
2536 return mlx5e_get_flow_tunnel_id(priv
, flow
, f
, filter_dev
);
2539 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec
*spec
)
2541 return MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2545 static void *get_match_inner_headers_value(struct mlx5_flow_spec
*spec
)
2547 return MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2551 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec
*spec
)
2553 return MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2557 static void *get_match_outer_headers_value(struct mlx5_flow_spec
*spec
)
2559 return MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2563 void *mlx5e_get_match_headers_value(u32 flags
, struct mlx5_flow_spec
*spec
)
2565 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
2566 get_match_inner_headers_value(spec
) :
2567 get_match_outer_headers_value(spec
);
2570 void *mlx5e_get_match_headers_criteria(u32 flags
, struct mlx5_flow_spec
*spec
)
2572 return (flags
& MLX5_FLOW_CONTEXT_ACTION_DECAP
) ?
2573 get_match_inner_headers_criteria(spec
) :
2574 get_match_outer_headers_criteria(spec
);
2577 static int mlx5e_flower_parse_meta(struct net_device
*filter_dev
,
2578 struct flow_cls_offload
*f
)
2580 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2581 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2582 struct net_device
*ingress_dev
;
2583 struct flow_match_meta match
;
2585 if (!flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_META
))
2588 flow_rule_match_meta(rule
, &match
);
2589 if (!match
.mask
->ingress_ifindex
)
2592 if (match
.mask
->ingress_ifindex
!= 0xFFFFFFFF) {
2593 NL_SET_ERR_MSG_MOD(extack
, "Unsupported ingress ifindex mask");
2597 ingress_dev
= __dev_get_by_index(dev_net(filter_dev
),
2598 match
.key
->ingress_ifindex
);
2600 NL_SET_ERR_MSG_MOD(extack
,
2601 "Can't find the ingress port to match on");
2605 if (ingress_dev
!= filter_dev
) {
2606 NL_SET_ERR_MSG_MOD(extack
,
2607 "Can't match on the ingress filter port");
2614 static bool skip_key_basic(struct net_device
*filter_dev
,
2615 struct flow_cls_offload
*f
)
2617 /* When doing mpls over udp decap, the user needs to provide
2618 * MPLS_UC as the protocol in order to be able to match on mpls
2619 * label fields. However, the actual ethertype is IP so we want to
2620 * avoid matching on this, otherwise we'll fail the match.
2622 if (netif_is_bareudp(filter_dev
) && f
->common
.chain_index
== 0)
2628 static int __parse_cls_flower(struct mlx5e_priv
*priv
,
2629 struct mlx5e_tc_flow
*flow
,
2630 struct mlx5_flow_spec
*spec
,
2631 struct flow_cls_offload
*f
,
2632 struct net_device
*filter_dev
,
2633 u8
*inner_match_level
, u8
*outer_match_level
)
2635 struct netlink_ext_ack
*extack
= f
->common
.extack
;
2636 void *headers_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2638 void *headers_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2640 void *misc_c
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2642 void *misc_v
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2644 void *misc_c_3
= MLX5_ADDR_OF(fte_match_param
, spec
->match_criteria
,
2646 void *misc_v_3
= MLX5_ADDR_OF(fte_match_param
, spec
->match_value
,
2648 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
2649 struct flow_dissector
*dissector
= rule
->match
.dissector
;
2650 enum fs_flow_table_type fs_type
;
2651 bool match_inner_ecn
= true;
2657 fs_type
= mlx5e_is_eswitch_flow(flow
) ? FS_FT_FDB
: FS_FT_NIC_RX
;
2658 match_level
= outer_match_level
;
2660 if (dissector
->used_keys
&
2661 ~(BIT(FLOW_DISSECTOR_KEY_META
) |
2662 BIT(FLOW_DISSECTOR_KEY_CONTROL
) |
2663 BIT(FLOW_DISSECTOR_KEY_BASIC
) |
2664 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS
) |
2665 BIT(FLOW_DISSECTOR_KEY_VLAN
) |
2666 BIT(FLOW_DISSECTOR_KEY_CVLAN
) |
2667 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS
) |
2668 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS
) |
2669 BIT(FLOW_DISSECTOR_KEY_PORTS
) |
2670 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID
) |
2671 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS
) |
2672 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS
) |
2673 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS
) |
2674 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL
) |
2675 BIT(FLOW_DISSECTOR_KEY_TCP
) |
2676 BIT(FLOW_DISSECTOR_KEY_IP
) |
2677 BIT(FLOW_DISSECTOR_KEY_CT
) |
2678 BIT(FLOW_DISSECTOR_KEY_ENC_IP
) |
2679 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS
) |
2680 BIT(FLOW_DISSECTOR_KEY_ICMP
) |
2681 BIT(FLOW_DISSECTOR_KEY_MPLS
))) {
2682 NL_SET_ERR_MSG_MOD(extack
, "Unsupported key");
2683 netdev_dbg(priv
->netdev
, "Unsupported key used: 0x%x\n",
2684 dissector
->used_keys
);
2688 if (mlx5e_get_tc_tun(filter_dev
)) {
2689 bool match_inner
= false;
2691 err
= parse_tunnel_attr(priv
, flow
, spec
, f
, filter_dev
,
2692 outer_match_level
, &match_inner
);
2697 /* header pointers should point to the inner headers
2698 * if the packet was decapsulated already.
2699 * outer headers are set by parse_tunnel_attr.
2701 match_level
= inner_match_level
;
2702 headers_c
= get_match_inner_headers_criteria(spec
);
2703 headers_v
= get_match_inner_headers_value(spec
);
2706 err
= mlx5e_tc_verify_tunnel_ecn(priv
, f
, &match_inner_ecn
);
2711 err
= mlx5e_flower_parse_meta(filter_dev
, f
);
2715 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
) &&
2716 !skip_key_basic(filter_dev
, f
)) {
2717 struct flow_match_basic match
;
2719 flow_rule_match_basic(rule
, &match
);
2720 mlx5e_tc_set_ethertype(priv
->mdev
, &match
,
2721 match_level
== outer_match_level
,
2722 headers_c
, headers_v
);
2724 if (match
.mask
->n_proto
)
2725 *match_level
= MLX5_MATCH_L2
;
2727 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_VLAN
) ||
2728 is_vlan_dev(filter_dev
)) {
2729 struct flow_dissector_key_vlan filter_dev_mask
;
2730 struct flow_dissector_key_vlan filter_dev_key
;
2731 struct flow_match_vlan match
;
2733 if (is_vlan_dev(filter_dev
)) {
2734 match
.key
= &filter_dev_key
;
2735 match
.key
->vlan_id
= vlan_dev_vlan_id(filter_dev
);
2736 match
.key
->vlan_tpid
= vlan_dev_vlan_proto(filter_dev
);
2737 match
.key
->vlan_priority
= 0;
2738 match
.mask
= &filter_dev_mask
;
2739 memset(match
.mask
, 0xff, sizeof(*match
.mask
));
2740 match
.mask
->vlan_priority
= 0;
2742 flow_rule_match_vlan(rule
, &match
);
2744 if (match
.mask
->vlan_id
||
2745 match
.mask
->vlan_priority
||
2746 match
.mask
->vlan_tpid
) {
2747 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
2748 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2750 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2753 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2755 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2759 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_vid
,
2760 match
.mask
->vlan_id
);
2761 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_vid
,
2762 match
.key
->vlan_id
);
2764 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, first_prio
,
2765 match
.mask
->vlan_priority
);
2766 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, first_prio
,
2767 match
.key
->vlan_priority
);
2769 *match_level
= MLX5_MATCH_L2
;
2771 if (!flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
) &&
2772 match
.mask
->vlan_eth_type
&&
2773 MLX5_CAP_FLOWTABLE_TYPE(priv
->mdev
,
2774 ft_field_support
.outer_second_vid
,
2776 MLX5_SET(fte_match_set_misc
, misc_c
,
2777 outer_second_cvlan_tag
, 1);
2778 spec
->match_criteria_enable
|=
2779 MLX5_MATCH_MISC_PARAMETERS
;
2782 } else if (*match_level
!= MLX5_MATCH_NONE
) {
2783 /* cvlan_tag enabled in match criteria and
2784 * disabled in match value means both S & C tags
2785 * don't exist (untagged of both)
2787 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, cvlan_tag
, 1);
2788 *match_level
= MLX5_MATCH_L2
;
2791 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CVLAN
)) {
2792 struct flow_match_vlan match
;
2794 flow_rule_match_cvlan(rule
, &match
);
2795 if (match
.mask
->vlan_id
||
2796 match
.mask
->vlan_priority
||
2797 match
.mask
->vlan_tpid
) {
2798 if (!MLX5_CAP_FLOWTABLE_TYPE(priv
->mdev
, ft_field_support
.outer_second_vid
,
2800 NL_SET_ERR_MSG_MOD(extack
,
2801 "Matching on CVLAN is not supported");
2805 if (match
.key
->vlan_tpid
== htons(ETH_P_8021AD
)) {
2806 MLX5_SET(fte_match_set_misc
, misc_c
,
2807 outer_second_svlan_tag
, 1);
2808 MLX5_SET(fte_match_set_misc
, misc_v
,
2809 outer_second_svlan_tag
, 1);
2811 MLX5_SET(fte_match_set_misc
, misc_c
,
2812 outer_second_cvlan_tag
, 1);
2813 MLX5_SET(fte_match_set_misc
, misc_v
,
2814 outer_second_cvlan_tag
, 1);
2817 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_vid
,
2818 match
.mask
->vlan_id
);
2819 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_vid
,
2820 match
.key
->vlan_id
);
2821 MLX5_SET(fte_match_set_misc
, misc_c
, outer_second_prio
,
2822 match
.mask
->vlan_priority
);
2823 MLX5_SET(fte_match_set_misc
, misc_v
, outer_second_prio
,
2824 match
.key
->vlan_priority
);
2826 *match_level
= MLX5_MATCH_L2
;
2827 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS
;
2831 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ETH_ADDRS
)) {
2832 struct flow_match_eth_addrs match
;
2834 flow_rule_match_eth_addrs(rule
, &match
);
2835 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2838 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2842 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2845 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2849 if (!is_zero_ether_addr(match
.mask
->src
) ||
2850 !is_zero_ether_addr(match
.mask
->dst
))
2851 *match_level
= MLX5_MATCH_L2
;
2854 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_CONTROL
)) {
2855 struct flow_match_control match
;
2857 flow_rule_match_control(rule
, &match
);
2858 addr_type
= match
.key
->addr_type
;
2860 /* the HW doesn't support frag first/later */
2861 if (match
.mask
->flags
& FLOW_DIS_FIRST_FRAG
) {
2862 NL_SET_ERR_MSG_MOD(extack
, "Match on frag first/later is not supported");
2866 if (match
.mask
->flags
& FLOW_DIS_IS_FRAGMENT
) {
2867 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, frag
, 1);
2868 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, frag
,
2869 match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
);
2871 /* the HW doesn't need L3 inline to match on frag=no */
2872 if (!(match
.key
->flags
& FLOW_DIS_IS_FRAGMENT
))
2873 *match_level
= MLX5_MATCH_L2
;
2874 /* *** L2 attributes parsing up to here *** */
2876 *match_level
= MLX5_MATCH_L3
;
2880 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_BASIC
)) {
2881 struct flow_match_basic match
;
2883 flow_rule_match_basic(rule
, &match
);
2884 ip_proto
= match
.key
->ip_proto
;
2886 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_protocol
,
2887 match
.mask
->ip_proto
);
2888 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
,
2889 match
.key
->ip_proto
);
2891 if (match
.mask
->ip_proto
)
2892 *match_level
= MLX5_MATCH_L3
;
2895 if (addr_type
== FLOW_DISSECTOR_KEY_IPV4_ADDRS
) {
2896 struct flow_match_ipv4_addrs match
;
2898 flow_rule_match_ipv4_addrs(rule
, &match
);
2899 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2900 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2901 &match
.mask
->src
, sizeof(match
.mask
->src
));
2902 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2903 src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
2904 &match
.key
->src
, sizeof(match
.key
->src
));
2905 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2906 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2907 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2908 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2909 dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
2910 &match
.key
->dst
, sizeof(match
.key
->dst
));
2912 if (match
.mask
->src
|| match
.mask
->dst
)
2913 *match_level
= MLX5_MATCH_L3
;
2916 if (addr_type
== FLOW_DISSECTOR_KEY_IPV6_ADDRS
) {
2917 struct flow_match_ipv6_addrs match
;
2919 flow_rule_match_ipv6_addrs(rule
, &match
);
2920 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2921 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2922 &match
.mask
->src
, sizeof(match
.mask
->src
));
2923 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2924 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
),
2925 &match
.key
->src
, sizeof(match
.key
->src
));
2927 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_c
,
2928 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2929 &match
.mask
->dst
, sizeof(match
.mask
->dst
));
2930 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4
, headers_v
,
2931 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
),
2932 &match
.key
->dst
, sizeof(match
.key
->dst
));
2934 if (ipv6_addr_type(&match
.mask
->src
) != IPV6_ADDR_ANY
||
2935 ipv6_addr_type(&match
.mask
->dst
) != IPV6_ADDR_ANY
)
2936 *match_level
= MLX5_MATCH_L3
;
2939 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_IP
)) {
2940 struct flow_match_ip match
;
2942 flow_rule_match_ip(rule
, &match
);
2943 if (match_inner_ecn
) {
2944 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_ecn
,
2945 match
.mask
->tos
& 0x3);
2946 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_ecn
,
2947 match
.key
->tos
& 0x3);
2950 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ip_dscp
,
2951 match
.mask
->tos
>> 2);
2952 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ip_dscp
,
2953 match
.key
->tos
>> 2);
2955 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, ttl_hoplimit
,
2957 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, ttl_hoplimit
,
2960 if (match
.mask
->ttl
&&
2961 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv
->mdev
,
2962 ft_field_support
.outer_ipv4_ttl
)) {
2963 NL_SET_ERR_MSG_MOD(extack
,
2964 "Matching on TTL is not supported");
2968 if (match
.mask
->tos
|| match
.mask
->ttl
)
2969 *match_level
= MLX5_MATCH_L3
;
2972 /* *** L3 attributes parsing up to here *** */
2974 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_PORTS
)) {
2975 struct flow_match_ports match
;
2977 flow_rule_match_ports(rule
, &match
);
2980 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2981 tcp_sport
, ntohs(match
.mask
->src
));
2982 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2983 tcp_sport
, ntohs(match
.key
->src
));
2985 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2986 tcp_dport
, ntohs(match
.mask
->dst
));
2987 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2988 tcp_dport
, ntohs(match
.key
->dst
));
2992 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2993 udp_sport
, ntohs(match
.mask
->src
));
2994 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
2995 udp_sport
, ntohs(match
.key
->src
));
2997 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
,
2998 udp_dport
, ntohs(match
.mask
->dst
));
2999 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
,
3000 udp_dport
, ntohs(match
.key
->dst
));
3003 NL_SET_ERR_MSG_MOD(extack
,
3004 "Only UDP and TCP transports are supported for L4 matching");
3005 netdev_err(priv
->netdev
,
3006 "Only UDP and TCP transport are supported\n");
3010 if (match
.mask
->src
|| match
.mask
->dst
)
3011 *match_level
= MLX5_MATCH_L4
;
3014 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_TCP
)) {
3015 struct flow_match_tcp match
;
3017 flow_rule_match_tcp(rule
, &match
);
3018 MLX5_SET(fte_match_set_lyr_2_4
, headers_c
, tcp_flags
,
3019 ntohs(match
.mask
->flags
));
3020 MLX5_SET(fte_match_set_lyr_2_4
, headers_v
, tcp_flags
,
3021 ntohs(match
.key
->flags
));
3023 if (match
.mask
->flags
)
3024 *match_level
= MLX5_MATCH_L4
;
3026 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_ICMP
)) {
3027 struct flow_match_icmp match
;
3029 flow_rule_match_icmp(rule
, &match
);
3032 if (!(MLX5_CAP_GEN(priv
->mdev
, flex_parser_protocols
) &
3033 MLX5_FLEX_PROTO_ICMP
)) {
3034 NL_SET_ERR_MSG_MOD(extack
,
3035 "Match on Flex protocols for ICMP is not supported");
3038 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmp_type
,
3040 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmp_type
,
3042 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmp_code
,
3044 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmp_code
,
3047 case IPPROTO_ICMPV6
:
3048 if (!(MLX5_CAP_GEN(priv
->mdev
, flex_parser_protocols
) &
3049 MLX5_FLEX_PROTO_ICMPV6
)) {
3050 NL_SET_ERR_MSG_MOD(extack
,
3051 "Match on Flex protocols for ICMPV6 is not supported");
3054 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmpv6_type
,
3056 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmpv6_type
,
3058 MLX5_SET(fte_match_set_misc3
, misc_c_3
, icmpv6_code
,
3060 MLX5_SET(fte_match_set_misc3
, misc_v_3
, icmpv6_code
,
3064 NL_SET_ERR_MSG_MOD(extack
,
3065 "Code and type matching only with ICMP and ICMPv6");
3066 netdev_err(priv
->netdev
,
3067 "Code and type matching only with ICMP and ICMPv6\n");
3070 if (match
.mask
->code
|| match
.mask
->type
) {
3071 *match_level
= MLX5_MATCH_L4
;
3072 spec
->match_criteria_enable
|= MLX5_MATCH_MISC_PARAMETERS_3
;
3075 /* Currently supported only for MPLS over UDP */
3076 if (flow_rule_match_key(rule
, FLOW_DISSECTOR_KEY_MPLS
) &&
3077 !netif_is_bareudp(filter_dev
)) {
3078 NL_SET_ERR_MSG_MOD(extack
,
3079 "Matching on MPLS is supported only for MPLS over UDP");
3080 netdev_err(priv
->netdev
,
3081 "Matching on MPLS is supported only for MPLS over UDP\n");
3088 static int parse_cls_flower(struct mlx5e_priv
*priv
,
3089 struct mlx5e_tc_flow
*flow
,
3090 struct mlx5_flow_spec
*spec
,
3091 struct flow_cls_offload
*f
,
3092 struct net_device
*filter_dev
)
3094 u8 inner_match_level
, outer_match_level
, non_tunnel_match_level
;
3095 struct netlink_ext_ack
*extack
= f
->common
.extack
;
3096 struct mlx5_core_dev
*dev
= priv
->mdev
;
3097 struct mlx5_eswitch
*esw
= dev
->priv
.eswitch
;
3098 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
3099 struct mlx5_eswitch_rep
*rep
;
3100 bool is_eswitch_flow
;
3103 inner_match_level
= MLX5_MATCH_NONE
;
3104 outer_match_level
= MLX5_MATCH_NONE
;
3106 err
= __parse_cls_flower(priv
, flow
, spec
, f
, filter_dev
,
3107 &inner_match_level
, &outer_match_level
);
3108 non_tunnel_match_level
= (inner_match_level
== MLX5_MATCH_NONE
) ?
3109 outer_match_level
: inner_match_level
;
3111 is_eswitch_flow
= mlx5e_is_eswitch_flow(flow
);
3112 if (!err
&& is_eswitch_flow
) {
3114 if (rep
->vport
!= MLX5_VPORT_UPLINK
&&
3115 (esw
->offloads
.inline_mode
!= MLX5_INLINE_MODE_NONE
&&
3116 esw
->offloads
.inline_mode
< non_tunnel_match_level
)) {
3117 NL_SET_ERR_MSG_MOD(extack
,
3118 "Flow is not offloaded due to min inline setting");
3119 netdev_warn(priv
->netdev
,
3120 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3121 non_tunnel_match_level
, esw
->offloads
.inline_mode
);
3126 flow
->attr
->inner_match_level
= inner_match_level
;
3127 flow
->attr
->outer_match_level
= outer_match_level
;
3133 struct mlx5_fields
{
3141 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3142 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3143 offsetof(struct pedit_headers, field) + (off), \
3144 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3146 /* masked values are the same and there are no rewrites that do not have a
3149 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3150 type matchmaskx = *(type *)(matchmaskp); \
3151 type matchvalx = *(type *)(matchvalp); \
3152 type maskx = *(type *)(maskp); \
3153 type valx = *(type *)(valp); \
3155 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3159 static bool cmp_val_mask(void *valp
, void *maskp
, void *matchvalp
,
3160 void *matchmaskp
, u8 bsize
)
3166 same
= SAME_VAL_MASK(u8
, valp
, maskp
, matchvalp
, matchmaskp
);
3169 same
= SAME_VAL_MASK(u16
, valp
, maskp
, matchvalp
, matchmaskp
);
3172 same
= SAME_VAL_MASK(u32
, valp
, maskp
, matchvalp
, matchmaskp
);
3179 static struct mlx5_fields fields
[] = {
3180 OFFLOAD(DMAC_47_16
, 32, U32_MAX
, eth
.h_dest
[0], 0, dmac_47_16
),
3181 OFFLOAD(DMAC_15_0
, 16, U16_MAX
, eth
.h_dest
[4], 0, dmac_15_0
),
3182 OFFLOAD(SMAC_47_16
, 32, U32_MAX
, eth
.h_source
[0], 0, smac_47_16
),
3183 OFFLOAD(SMAC_15_0
, 16, U16_MAX
, eth
.h_source
[4], 0, smac_15_0
),
3184 OFFLOAD(ETHERTYPE
, 16, U16_MAX
, eth
.h_proto
, 0, ethertype
),
3185 OFFLOAD(FIRST_VID
, 16, U16_MAX
, vlan
.h_vlan_TCI
, 0, first_vid
),
3187 OFFLOAD(IP_DSCP
, 8, 0xfc, ip4
.tos
, 0, ip_dscp
),
3188 OFFLOAD(IP_TTL
, 8, U8_MAX
, ip4
.ttl
, 0, ttl_hoplimit
),
3189 OFFLOAD(SIPV4
, 32, U32_MAX
, ip4
.saddr
, 0, src_ipv4_src_ipv6
.ipv4_layout
.ipv4
),
3190 OFFLOAD(DIPV4
, 32, U32_MAX
, ip4
.daddr
, 0, dst_ipv4_dst_ipv6
.ipv4_layout
.ipv4
),
3192 OFFLOAD(SIPV6_127_96
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[0], 0,
3193 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[0]),
3194 OFFLOAD(SIPV6_95_64
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[1], 0,
3195 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[4]),
3196 OFFLOAD(SIPV6_63_32
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[2], 0,
3197 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[8]),
3198 OFFLOAD(SIPV6_31_0
, 32, U32_MAX
, ip6
.saddr
.s6_addr32
[3], 0,
3199 src_ipv4_src_ipv6
.ipv6_layout
.ipv6
[12]),
3200 OFFLOAD(DIPV6_127_96
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[0], 0,
3201 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[0]),
3202 OFFLOAD(DIPV6_95_64
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[1], 0,
3203 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[4]),
3204 OFFLOAD(DIPV6_63_32
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[2], 0,
3205 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[8]),
3206 OFFLOAD(DIPV6_31_0
, 32, U32_MAX
, ip6
.daddr
.s6_addr32
[3], 0,
3207 dst_ipv4_dst_ipv6
.ipv6_layout
.ipv6
[12]),
3208 OFFLOAD(IPV6_HOPLIMIT
, 8, U8_MAX
, ip6
.hop_limit
, 0, ttl_hoplimit
),
3209 OFFLOAD(IP_DSCP
, 16, 0xc00f, ip6
, 0, ip_dscp
),
3211 OFFLOAD(TCP_SPORT
, 16, U16_MAX
, tcp
.source
, 0, tcp_sport
),
3212 OFFLOAD(TCP_DPORT
, 16, U16_MAX
, tcp
.dest
, 0, tcp_dport
),
3213 /* in linux iphdr tcp_flags is 8 bits long */
3214 OFFLOAD(TCP_FLAGS
, 8, U8_MAX
, tcp
.ack_seq
, 5, tcp_flags
),
3216 OFFLOAD(UDP_SPORT
, 16, U16_MAX
, udp
.source
, 0, udp_sport
),
3217 OFFLOAD(UDP_DPORT
, 16, U16_MAX
, udp
.dest
, 0, udp_dport
),
3220 static unsigned long mask_to_le(unsigned long mask
, int size
)
3226 mask_be32
= (__force __be32
)(mask
);
3227 mask
= (__force
unsigned long)cpu_to_le32(be32_to_cpu(mask_be32
));
3228 } else if (size
== 16) {
3229 mask_be32
= (__force __be32
)(mask
);
3230 mask_be16
= *(__be16
*)&mask_be32
;
3231 mask
= (__force
unsigned long)cpu_to_le16(be16_to_cpu(mask_be16
));
3237 static int offload_pedit_fields(struct mlx5e_priv
*priv
,
3239 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3241 struct netlink_ext_ack
*extack
)
3243 struct pedit_headers
*set_masks
, *add_masks
, *set_vals
, *add_vals
;
3244 struct pedit_headers_action
*hdrs
= parse_attr
->hdrs
;
3245 void *headers_c
, *headers_v
, *action
, *vals_p
;
3246 u32
*s_masks_p
, *a_masks_p
, s_mask
, a_mask
;
3247 struct mlx5e_tc_mod_hdr_acts
*mod_acts
;
3248 unsigned long mask
, field_mask
;
3249 int i
, first
, last
, next_z
;
3250 struct mlx5_fields
*f
;
3253 mod_acts
= &parse_attr
->mod_hdr_acts
;
3254 headers_c
= mlx5e_get_match_headers_criteria(*action_flags
, &parse_attr
->spec
);
3255 headers_v
= mlx5e_get_match_headers_value(*action_flags
, &parse_attr
->spec
);
3257 set_masks
= &hdrs
[0].masks
;
3258 add_masks
= &hdrs
[1].masks
;
3259 set_vals
= &hdrs
[0].vals
;
3260 add_vals
= &hdrs
[1].vals
;
3262 for (i
= 0; i
< ARRAY_SIZE(fields
); i
++) {
3266 /* avoid seeing bits set from previous iterations */
3270 s_masks_p
= (void *)set_masks
+ f
->offset
;
3271 a_masks_p
= (void *)add_masks
+ f
->offset
;
3273 s_mask
= *s_masks_p
& f
->field_mask
;
3274 a_mask
= *a_masks_p
& f
->field_mask
;
3276 if (!s_mask
&& !a_mask
) /* nothing to offload here */
3279 if (s_mask
&& a_mask
) {
3280 NL_SET_ERR_MSG_MOD(extack
,
3281 "can't set and add to the same HW field");
3282 netdev_warn(priv
->netdev
,
3283 "mlx5: can't set and add to the same HW field (%x)\n",
3290 void *match_mask
= headers_c
+ f
->match_offset
;
3291 void *match_val
= headers_v
+ f
->match_offset
;
3293 cmd
= MLX5_ACTION_TYPE_SET
;
3295 vals_p
= (void *)set_vals
+ f
->offset
;
3296 /* don't rewrite if we have a match on the same value */
3297 if (cmp_val_mask(vals_p
, s_masks_p
, match_val
,
3298 match_mask
, f
->field_bsize
))
3300 /* clear to denote we consumed this field */
3301 *s_masks_p
&= ~f
->field_mask
;
3303 cmd
= MLX5_ACTION_TYPE_ADD
;
3305 vals_p
= (void *)add_vals
+ f
->offset
;
3306 /* add 0 is no change */
3307 if ((*(u32
*)vals_p
& f
->field_mask
) == 0)
3309 /* clear to denote we consumed this field */
3310 *a_masks_p
&= ~f
->field_mask
;
3315 mask
= mask_to_le(mask
, f
->field_bsize
);
3317 first
= find_first_bit(&mask
, f
->field_bsize
);
3318 next_z
= find_next_zero_bit(&mask
, f
->field_bsize
, first
);
3319 last
= find_last_bit(&mask
, f
->field_bsize
);
3320 if (first
< next_z
&& next_z
< last
) {
3321 NL_SET_ERR_MSG_MOD(extack
,
3322 "rewrite of few sub-fields isn't supported");
3323 netdev_warn(priv
->netdev
,
3324 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3329 action
= mlx5e_mod_hdr_alloc(priv
->mdev
, namespace, mod_acts
);
3330 if (IS_ERR(action
)) {
3331 NL_SET_ERR_MSG_MOD(extack
,
3332 "too many pedit actions, can't offload");
3333 mlx5_core_warn(priv
->mdev
,
3334 "mlx5: parsed %d pedit actions, can't do more\n",
3335 mod_acts
->num_actions
);
3336 return PTR_ERR(action
);
3339 MLX5_SET(set_action_in
, action
, action_type
, cmd
);
3340 MLX5_SET(set_action_in
, action
, field
, f
->field
);
3342 if (cmd
== MLX5_ACTION_TYPE_SET
) {
3345 field_mask
= mask_to_le(f
->field_mask
, f
->field_bsize
);
3347 /* if field is bit sized it can start not from first bit */
3348 start
= find_first_bit(&field_mask
, f
->field_bsize
);
3350 MLX5_SET(set_action_in
, action
, offset
, first
- start
);
3351 /* length is num of bits to be written, zero means length of 32 */
3352 MLX5_SET(set_action_in
, action
, length
, (last
- first
+ 1));
3355 if (f
->field_bsize
== 32)
3356 MLX5_SET(set_action_in
, action
, data
, ntohl(*(__be32
*)vals_p
) >> first
);
3357 else if (f
->field_bsize
== 16)
3358 MLX5_SET(set_action_in
, action
, data
, ntohs(*(__be16
*)vals_p
) >> first
);
3359 else if (f
->field_bsize
== 8)
3360 MLX5_SET(set_action_in
, action
, data
, *(u8
*)vals_p
>> first
);
3362 ++mod_acts
->num_actions
;
3368 static const struct pedit_headers zero_masks
= {};
3370 static int verify_offload_pedit_fields(struct mlx5e_priv
*priv
,
3371 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3372 struct netlink_ext_ack
*extack
)
3374 struct pedit_headers
*cmd_masks
;
3377 for (cmd
= 0; cmd
< __PEDIT_CMD_MAX
; cmd
++) {
3378 cmd_masks
= &parse_attr
->hdrs
[cmd
].masks
;
3379 if (memcmp(cmd_masks
, &zero_masks
, sizeof(zero_masks
))) {
3380 NL_SET_ERR_MSG_MOD(extack
, "attempt to offload an unsupported field");
3381 netdev_warn(priv
->netdev
, "attempt to offload an unsupported field (cmd %d)\n", cmd
);
3382 print_hex_dump(KERN_WARNING
, "mask: ", DUMP_PREFIX_ADDRESS
,
3383 16, 1, cmd_masks
, sizeof(zero_masks
), true);
3391 static int alloc_tc_pedit_action(struct mlx5e_priv
*priv
, int namespace,
3392 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3394 struct netlink_ext_ack
*extack
)
3398 err
= offload_pedit_fields(priv
, namespace, parse_attr
, action_flags
, extack
);
3400 goto out_dealloc_parsed_actions
;
3402 err
= verify_offload_pedit_fields(priv
, parse_attr
, extack
);
3404 goto out_dealloc_parsed_actions
;
3408 out_dealloc_parsed_actions
:
3409 mlx5e_mod_hdr_dealloc(&parse_attr
->mod_hdr_acts
);
3413 struct ip_ttl_word
{
3419 struct ipv6_hoplimit_word
{
3426 is_flow_action_modify_ip_header(struct flow_action
*flow_action
)
3428 const struct flow_action_entry
*act
;
3433 /* For IPv4 & IPv6 header check 4 byte word,
3434 * to determine that modified fields
3435 * are NOT ttl & hop_limit only.
3437 flow_action_for_each(i
, act
, flow_action
) {
3438 if (act
->id
!= FLOW_ACTION_MANGLE
&&
3439 act
->id
!= FLOW_ACTION_ADD
)
3442 htype
= act
->mangle
.htype
;
3443 offset
= act
->mangle
.offset
;
3444 mask
= ~act
->mangle
.mask
;
3446 if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP4
) {
3447 struct ip_ttl_word
*ttl_word
=
3448 (struct ip_ttl_word
*)&mask
;
3450 if (offset
!= offsetof(struct iphdr
, ttl
) ||
3451 ttl_word
->protocol
||
3454 } else if (htype
== FLOW_ACT_MANGLE_HDR_TYPE_IP6
) {
3455 struct ipv6_hoplimit_word
*hoplimit_word
=
3456 (struct ipv6_hoplimit_word
*)&mask
;
3458 if (offset
!= offsetof(struct ipv6hdr
, payload_len
) ||
3459 hoplimit_word
->payload_len
||
3460 hoplimit_word
->nexthdr
)
3468 static bool modify_header_match_supported(struct mlx5e_priv
*priv
,
3469 struct mlx5_flow_spec
*spec
,
3470 struct flow_action
*flow_action
,
3472 struct netlink_ext_ack
*extack
)
3474 bool modify_ip_header
;
3480 headers_c
= mlx5e_get_match_headers_criteria(actions
, spec
);
3481 headers_v
= mlx5e_get_match_headers_value(actions
, spec
);
3482 ethertype
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ethertype
);
3484 /* for non-IP we only re-write MACs, so we're okay */
3485 if (MLX5_GET(fte_match_set_lyr_2_4
, headers_c
, ip_version
) == 0 &&
3486 ethertype
!= ETH_P_IP
&& ethertype
!= ETH_P_IPV6
)
3489 modify_ip_header
= is_flow_action_modify_ip_header(flow_action
);
3490 ip_proto
= MLX5_GET(fte_match_set_lyr_2_4
, headers_v
, ip_protocol
);
3491 if (modify_ip_header
&& ip_proto
!= IPPROTO_TCP
&&
3492 ip_proto
!= IPPROTO_UDP
&& ip_proto
!= IPPROTO_ICMP
) {
3493 NL_SET_ERR_MSG_MOD(extack
,
3494 "can't offload re-write of non TCP/UDP");
3495 netdev_info(priv
->netdev
, "can't offload re-write of ip proto %d\n",
3505 actions_match_supported_fdb(struct mlx5e_priv
*priv
,
3506 struct mlx5e_tc_flow
*flow
,
3507 struct netlink_ext_ack
*extack
)
3509 struct mlx5_esw_flow_attr
*esw_attr
= flow
->attr
->esw_attr
;
3511 if (esw_attr
->split_count
> 0 && !mlx5_esw_has_fwd_fdb(priv
->mdev
)) {
3512 NL_SET_ERR_MSG_MOD(extack
,
3513 "current firmware doesn't support split rule for port mirroring");
3514 netdev_warn_once(priv
->netdev
,
3515 "current firmware doesn't support split rule for port mirroring\n");
3523 actions_match_supported(struct mlx5e_priv
*priv
,
3524 struct flow_action
*flow_action
,
3526 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
3527 struct mlx5e_tc_flow
*flow
,
3528 struct netlink_ext_ack
*extack
)
3530 if (actions
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
&&
3531 !modify_header_match_supported(priv
, &parse_attr
->spec
, flow_action
, actions
,
3535 if (mlx5e_is_eswitch_flow(flow
) &&
3536 !actions_match_supported_fdb(priv
, flow
, extack
))
3542 static bool same_port_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
3544 return priv
->mdev
== peer_priv
->mdev
;
3547 bool mlx5e_same_hw_devs(struct mlx5e_priv
*priv
, struct mlx5e_priv
*peer_priv
)
3549 struct mlx5_core_dev
*fmdev
, *pmdev
;
3550 u64 fsystem_guid
, psystem_guid
;
3553 pmdev
= peer_priv
->mdev
;
3555 fsystem_guid
= mlx5_query_nic_system_image_guid(fmdev
);
3556 psystem_guid
= mlx5_query_nic_system_image_guid(pmdev
);
3558 return (fsystem_guid
== psystem_guid
);
3562 actions_prepare_mod_hdr_actions(struct mlx5e_priv
*priv
,
3563 struct mlx5e_tc_flow
*flow
,
3564 struct mlx5_flow_attr
*attr
,
3565 struct netlink_ext_ack
*extack
)
3567 struct mlx5e_tc_flow_parse_attr
*parse_attr
= attr
->parse_attr
;
3568 struct pedit_headers_action
*hdrs
= parse_attr
->hdrs
;
3569 enum mlx5_flow_namespace_type ns_type
;
3572 if (!hdrs
[TCA_PEDIT_KEY_EX_CMD_SET
].pedits
&&
3573 !hdrs
[TCA_PEDIT_KEY_EX_CMD_ADD
].pedits
)
3576 ns_type
= mlx5e_get_flow_namespace(flow
);
3578 err
= alloc_tc_pedit_action(priv
, ns_type
, parse_attr
, &attr
->action
, extack
);
3582 if (parse_attr
->mod_hdr_acts
.num_actions
> 0)
3585 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3586 attr
->action
&= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
3587 mlx5e_mod_hdr_dealloc(&parse_attr
->mod_hdr_acts
);
3589 if (ns_type
!= MLX5_FLOW_NAMESPACE_FDB
)
3592 if (!((attr
->action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_POP
) ||
3593 (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH
)))
3594 attr
->esw_attr
->split_count
= 0;
3599 static struct mlx5_flow_attr
*
3600 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr
*attr
,
3601 enum mlx5_flow_namespace_type ns_type
)
3603 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
3604 u32 attr_sz
= ns_to_attr_sz(ns_type
);
3605 struct mlx5_flow_attr
*attr2
;
3607 attr2
= mlx5_alloc_flow_attr(ns_type
);
3608 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
3609 if (!attr2
|| !parse_attr
) {
3615 memcpy(attr2
, attr
, attr_sz
);
3616 INIT_LIST_HEAD(&attr2
->list
);
3617 parse_attr
->filter_dev
= attr
->parse_attr
->filter_dev
;
3619 attr2
->counter
= NULL
;
3620 attr2
->tc_act_cookies_count
= 0;
3622 attr2
->parse_attr
= parse_attr
;
3623 attr2
->dest_chain
= 0;
3624 attr2
->dest_ft
= NULL
;
3625 attr2
->act_id_restore_rule
= NULL
;
3626 memset(&attr2
->ct_attr
, 0, sizeof(attr2
->ct_attr
));
3628 if (ns_type
== MLX5_FLOW_NAMESPACE_FDB
) {
3629 attr2
->esw_attr
->out_count
= 0;
3630 attr2
->esw_attr
->split_count
= 0;
3633 attr2
->branch_true
= NULL
;
3634 attr2
->branch_false
= NULL
;
3635 attr2
->jumping_attr
= NULL
;
3639 struct mlx5_flow_attr
*
3640 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow
*flow
)
3642 struct mlx5_esw_flow_attr
*esw_attr
;
3643 struct mlx5_flow_attr
*attr
;
3646 list_for_each_entry(attr
, &flow
->attrs
, list
) {
3647 esw_attr
= attr
->esw_attr
;
3648 for (i
= 0; i
< MLX5_MAX_FLOW_FWD_VPORTS
; i
++) {
3649 if (esw_attr
->dests
[i
].flags
& MLX5_ESW_DEST_ENCAP
)
3658 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow
*flow
)
3660 struct mlx5e_post_act
*post_act
= get_post_action(flow
->priv
);
3661 struct mlx5_flow_attr
*attr
;
3663 list_for_each_entry(attr
, &flow
->attrs
, list
) {
3664 if (list_is_last(&attr
->list
, &flow
->attrs
))
3667 mlx5e_tc_post_act_unoffload(post_act
, attr
->post_act_handle
);
3672 free_flow_post_acts(struct mlx5e_tc_flow
*flow
)
3674 struct mlx5_flow_attr
*attr
, *tmp
;
3676 list_for_each_entry_safe(attr
, tmp
, &flow
->attrs
, list
) {
3677 if (list_is_last(&attr
->list
, &flow
->attrs
))
3680 mlx5_free_flow_attr_actions(flow
, attr
);
3682 list_del(&attr
->list
);
3683 kvfree(attr
->parse_attr
);
3689 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow
*flow
)
3691 struct mlx5e_post_act
*post_act
= get_post_action(flow
->priv
);
3692 struct mlx5_flow_attr
*attr
;
3695 list_for_each_entry(attr
, &flow
->attrs
, list
) {
3696 if (list_is_last(&attr
->list
, &flow
->attrs
))
3699 err
= mlx5e_tc_post_act_offload(post_act
, attr
->post_act_handle
);
3707 /* TC filter rule HW translation:
3709 * +---------------------+
3710 * + ft prio (tc chain) +
3711 * + original match +
3712 * +---------------------+
3714 * | if multi table action
3717 * +---------------------+
3718 * + post act ft |<----.
3719 * + match fte id | | split on multi table action
3720 * + do actions |-----'
3721 * +---------------------+
3725 * Do rest of the actions after last multi table action.
3728 alloc_flow_post_acts(struct mlx5e_tc_flow
*flow
, struct netlink_ext_ack
*extack
)
3730 struct mlx5e_post_act
*post_act
= get_post_action(flow
->priv
);
3731 struct mlx5_flow_attr
*attr
, *next_attr
= NULL
;
3732 struct mlx5e_post_act_handle
*handle
;
3735 /* This is going in reverse order as needed.
3736 * The first entry is the last attribute.
3738 list_for_each_entry(attr
, &flow
->attrs
, list
) {
3740 /* Set counter action on last post act rule. */
3741 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3744 if (next_attr
&& !(attr
->flags
& MLX5_ATTR_FLAG_TERMINATING
)) {
3745 err
= mlx5e_tc_act_set_next_post_act(flow
, attr
, next_attr
);
3750 /* Don't add post_act rule for first attr (last in the list).
3751 * It's being handled by the caller.
3753 if (list_is_last(&attr
->list
, &flow
->attrs
))
3756 err
= actions_prepare_mod_hdr_actions(flow
->priv
, flow
, attr
, extack
);
3760 err
= post_process_attr(flow
, attr
, extack
);
3764 handle
= mlx5e_tc_post_act_add(post_act
, attr
);
3765 if (IS_ERR(handle
)) {
3766 err
= PTR_ERR(handle
);
3770 attr
->post_act_handle
= handle
;
3772 if (attr
->jumping_attr
) {
3773 err
= mlx5e_tc_act_set_next_post_act(flow
, attr
->jumping_attr
, attr
);
3781 if (flow_flag_test(flow
, SLOW
))
3784 err
= mlx5e_tc_offload_flow_post_acts(flow
);
3792 free_flow_post_acts(flow
);
3797 alloc_branch_attr(struct mlx5e_tc_flow
*flow
,
3798 struct mlx5e_tc_act_branch_ctrl
*cond
,
3799 struct mlx5_flow_attr
**cond_attr
,
3801 struct netlink_ext_ack
*extack
)
3803 struct mlx5_flow_attr
*attr
;
3806 *cond_attr
= mlx5e_clone_flow_attr_for_post_act(flow
->attr
,
3807 mlx5e_get_flow_namespace(flow
));
3813 switch (cond
->act_id
) {
3814 case FLOW_ACTION_DROP
:
3815 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_DROP
;
3817 case FLOW_ACTION_ACCEPT
:
3818 case FLOW_ACTION_PIPE
:
3819 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3820 attr
->dest_ft
= mlx5e_tc_post_act_get_ft(get_post_action(flow
->priv
));
3822 case FLOW_ACTION_JUMP
:
3824 NL_SET_ERR_MSG_MOD(extack
, "Cannot offload flows with nested jumps");
3828 *jump_count
= cond
->extval
;
3829 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3830 attr
->dest_ft
= mlx5e_tc_post_act_get_ft(get_post_action(flow
->priv
));
3845 dec_jump_count(struct flow_action_entry
*act
, struct mlx5e_tc_act
*tc_act
,
3846 struct mlx5_flow_attr
*attr
, struct mlx5e_priv
*priv
,
3847 struct mlx5e_tc_jump_state
*jump_state
)
3849 if (!jump_state
->jump_count
)
3852 /* Single tc action can instantiate multiple offload actions (e.g. pedit)
3853 * Jump only over a tc action
3855 if (act
->id
== jump_state
->last_id
&& act
->hw_index
== jump_state
->last_index
)
3858 jump_state
->last_id
= act
->id
;
3859 jump_state
->last_index
= act
->hw_index
;
3861 /* nothing to do for intermediate actions */
3862 if (--jump_state
->jump_count
> 1)
3865 if (jump_state
->jump_count
== 1) { /* last action in the jump action list */
3867 /* create a new attribute after this action */
3868 jump_state
->jump_target
= true;
3870 if (tc_act
->is_terminating_action
) { /* the branch ends here */
3871 attr
->flags
|= MLX5_ATTR_FLAG_TERMINATING
;
3872 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3873 } else { /* the branch continues executing the rest of the actions */
3874 struct mlx5e_post_act
*post_act
;
3876 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST
;
3877 post_act
= get_post_action(priv
);
3878 attr
->dest_ft
= mlx5e_tc_post_act_get_ft(post_act
);
3880 } else if (jump_state
->jump_count
== 0) { /* first attr after the jump action list */
3881 /* This is the post action for the jumping attribute (either red or green)
3882 * Use the stored jumping_attr to set the post act id on the jumping attribute
3884 attr
->jumping_attr
= jump_state
->jumping_attr
;
3889 parse_branch_ctrl(struct flow_action_entry
*act
, struct mlx5e_tc_act
*tc_act
,
3890 struct mlx5e_tc_flow
*flow
, struct mlx5_flow_attr
*attr
,
3891 struct mlx5e_tc_jump_state
*jump_state
,
3892 struct netlink_ext_ack
*extack
)
3894 struct mlx5e_tc_act_branch_ctrl cond_true
, cond_false
;
3895 u32 jump_count
= jump_state
->jump_count
;
3898 if (!tc_act
->get_branch_ctrl
)
3901 tc_act
->get_branch_ctrl(act
, &cond_true
, &cond_false
);
3903 err
= alloc_branch_attr(flow
, &cond_true
,
3904 &attr
->branch_true
, &jump_count
, extack
);
3909 jump_state
->jumping_attr
= attr
->branch_true
;
3911 err
= alloc_branch_attr(flow
, &cond_false
,
3912 &attr
->branch_false
, &jump_count
, extack
);
3914 goto err_branch_false
;
3916 if (jump_count
&& !jump_state
->jumping_attr
)
3917 jump_state
->jumping_attr
= attr
->branch_false
;
3919 jump_state
->jump_count
= jump_count
;
3921 /* branching action requires its own counter */
3922 attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
3923 flow_flag_set(flow
, USE_ACT_STATS
);
3928 free_branch_attr(flow
, attr
->branch_true
);
3934 parse_tc_actions(struct mlx5e_tc_act_parse_state
*parse_state
,
3935 struct flow_action
*flow_action
)
3937 struct netlink_ext_ack
*extack
= parse_state
->extack
;
3938 struct mlx5e_tc_flow
*flow
= parse_state
->flow
;
3939 struct mlx5e_tc_jump_state jump_state
= {};
3940 struct mlx5_flow_attr
*attr
= flow
->attr
;
3941 enum mlx5_flow_namespace_type ns_type
;
3942 struct mlx5e_priv
*priv
= flow
->priv
;
3943 struct mlx5_flow_attr
*prev_attr
;
3944 struct flow_action_entry
*act
;
3945 struct mlx5e_tc_act
*tc_act
;
3949 ns_type
= mlx5e_get_flow_namespace(flow
);
3950 list_add(&attr
->list
, &flow
->attrs
);
3952 flow_action_for_each(i
, act
, flow_action
) {
3953 jump_state
.jump_target
= false;
3954 is_missable
= false;
3957 tc_act
= mlx5e_tc_act_get(act
->id
, ns_type
);
3959 NL_SET_ERR_MSG_MOD(extack
, "Not implemented offload action");
3961 goto out_free_post_acts
;
3964 if (tc_act
->can_offload
&& !tc_act
->can_offload(parse_state
, act
, i
, attr
)) {
3966 goto out_free_post_acts
;
3969 err
= tc_act
->parse_action(parse_state
, act
, priv
, attr
);
3971 goto out_free_post_acts
;
3973 dec_jump_count(act
, tc_act
, attr
, priv
, &jump_state
);
3975 err
= parse_branch_ctrl(act
, tc_act
, flow
, attr
, &jump_state
, extack
);
3977 goto out_free_post_acts
;
3979 parse_state
->actions
|= attr
->action
;
3981 /* Split attr for multi table act if not the last act. */
3982 if (jump_state
.jump_target
||
3983 (tc_act
->is_multi_table_act
&&
3984 tc_act
->is_multi_table_act(priv
, act
, attr
) &&
3985 i
< flow_action
->num_entries
- 1)) {
3986 is_missable
= tc_act
->is_missable
? tc_act
->is_missable(act
) : false;
3988 err
= mlx5e_tc_act_post_parse(parse_state
, flow_action
, attr
, ns_type
);
3990 goto out_free_post_acts
;
3992 attr
= mlx5e_clone_flow_attr_for_post_act(flow
->attr
, ns_type
);
3995 goto out_free_post_acts
;
3998 list_add(&attr
->list
, &flow
->attrs
);
4002 /* Add counter to prev, and assign act to new (next) attr */
4003 prev_attr
->action
|= MLX5_FLOW_CONTEXT_ACTION_COUNT
;
4004 flow_flag_set(flow
, USE_ACT_STATS
);
4006 attr
->tc_act_cookies
[attr
->tc_act_cookies_count
++] = act
->cookie
;
4007 } else if (!tc_act
->stats_action
) {
4008 prev_attr
->tc_act_cookies
[prev_attr
->tc_act_cookies_count
++] = act
->cookie
;
4012 err
= mlx5e_tc_act_post_parse(parse_state
, flow_action
, attr
, ns_type
);
4014 goto out_free_post_acts
;
4016 err
= alloc_flow_post_acts(flow
, extack
);
4018 goto out_free_post_acts
;
4023 free_flow_post_acts(flow
);
4029 flow_action_supported(struct flow_action
*flow_action
,
4030 struct netlink_ext_ack
*extack
)
4032 if (!flow_action_has_entries(flow_action
)) {
4033 NL_SET_ERR_MSG_MOD(extack
, "Flow action doesn't have any entries");
4037 if (!flow_action_hw_stats_check(flow_action
, extack
,
4038 FLOW_ACTION_HW_STATS_DELAYED_BIT
)) {
4039 NL_SET_ERR_MSG_MOD(extack
, "Flow action HW stats type is not supported");
4047 parse_tc_nic_actions(struct mlx5e_priv
*priv
,
4048 struct flow_action
*flow_action
,
4049 struct mlx5e_tc_flow
*flow
,
4050 struct netlink_ext_ack
*extack
)
4052 struct mlx5e_tc_act_parse_state
*parse_state
;
4053 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4054 struct mlx5_flow_attr
*attr
= flow
->attr
;
4057 err
= flow_action_supported(flow_action
, extack
);
4061 attr
->nic_attr
->flow_tag
= MLX5_FS_DEFAULT_FLOW_TAG
;
4062 parse_attr
= attr
->parse_attr
;
4063 parse_state
= &parse_attr
->parse_state
;
4064 mlx5e_tc_act_init_parse_state(parse_state
, flow
, flow_action
, extack
);
4065 parse_state
->ct_priv
= get_ct_priv(priv
);
4067 err
= parse_tc_actions(parse_state
, flow_action
);
4071 err
= actions_prepare_mod_hdr_actions(priv
, flow
, attr
, extack
);
4075 err
= verify_attr_actions(attr
->action
, extack
);
4079 if (!actions_match_supported(priv
, flow_action
, parse_state
->actions
,
4080 parse_attr
, flow
, extack
))
4086 static bool is_merged_eswitch_vfs(struct mlx5e_priv
*priv
,
4087 struct net_device
*peer_netdev
)
4089 struct mlx5e_priv
*peer_priv
;
4091 peer_priv
= netdev_priv(peer_netdev
);
4093 return (MLX5_CAP_ESW(priv
->mdev
, merged_eswitch
) &&
4094 mlx5e_eswitch_vf_rep(priv
->netdev
) &&
4095 mlx5e_eswitch_vf_rep(peer_netdev
) &&
4096 mlx5e_same_hw_devs(priv
, peer_priv
));
4099 static bool same_hw_reps(struct mlx5e_priv
*priv
,
4100 struct net_device
*peer_netdev
)
4102 struct mlx5e_priv
*peer_priv
;
4104 peer_priv
= netdev_priv(peer_netdev
);
4106 return mlx5e_eswitch_rep(priv
->netdev
) &&
4107 mlx5e_eswitch_rep(peer_netdev
) &&
4108 mlx5e_same_hw_devs(priv
, peer_priv
);
4111 static bool is_lag_dev(struct mlx5e_priv
*priv
,
4112 struct net_device
*peer_netdev
)
4114 return ((mlx5_lag_is_sriov(priv
->mdev
) ||
4115 mlx5_lag_is_multipath(priv
->mdev
)) &&
4116 same_hw_reps(priv
, peer_netdev
));
4119 static bool is_multiport_eligible(struct mlx5e_priv
*priv
, struct net_device
*out_dev
)
4121 return same_hw_reps(priv
, out_dev
) && mlx5_lag_is_mpesw(priv
->mdev
);
4124 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv
*priv
,
4125 struct net_device
*out_dev
)
4127 if (is_merged_eswitch_vfs(priv
, out_dev
))
4130 if (is_multiport_eligible(priv
, out_dev
))
4133 if (is_lag_dev(priv
, out_dev
))
4136 return mlx5e_eswitch_rep(out_dev
) &&
4137 same_port_devs(priv
, netdev_priv(out_dev
));
4140 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv
*priv
,
4141 struct mlx5_flow_attr
*attr
,
4143 enum mlx5e_tc_int_port_type type
,
4147 struct mlx5_esw_flow_attr
*esw_attr
= attr
->esw_attr
;
4148 struct mlx5e_tc_int_port_priv
*int_port_priv
;
4149 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4150 struct mlx5e_tc_int_port
*dest_int_port
;
4153 parse_attr
= attr
->parse_attr
;
4154 int_port_priv
= mlx5e_get_int_port_priv(priv
);
4156 dest_int_port
= mlx5e_tc_int_port_get(int_port_priv
, ifindex
, type
);
4157 if (IS_ERR(dest_int_port
))
4158 return PTR_ERR(dest_int_port
);
4160 err
= mlx5e_tc_match_to_reg_set(priv
->mdev
, &parse_attr
->mod_hdr_acts
,
4161 MLX5_FLOW_NAMESPACE_FDB
, VPORT_TO_REG
,
4162 mlx5e_tc_int_port_get_metadata(dest_int_port
));
4164 mlx5e_tc_int_port_put(int_port_priv
, dest_int_port
);
4168 *action
|= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
;
4170 esw_attr
->dest_int_port
= dest_int_port
;
4171 esw_attr
->dests
[out_index
].flags
|= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE
;
4172 esw_attr
->split_count
= out_index
;
4174 /* Forward to root fdb for matching against the new source vport */
4175 attr
->dest_chain
= 0;
4181 parse_tc_fdb_actions(struct mlx5e_priv
*priv
,
4182 struct flow_action
*flow_action
,
4183 struct mlx5e_tc_flow
*flow
,
4184 struct netlink_ext_ack
*extack
)
4186 struct mlx5e_tc_act_parse_state
*parse_state
;
4187 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4188 struct mlx5_flow_attr
*attr
= flow
->attr
;
4189 struct mlx5_esw_flow_attr
*esw_attr
;
4190 struct net_device
*filter_dev
;
4193 err
= flow_action_supported(flow_action
, extack
);
4197 esw_attr
= attr
->esw_attr
;
4198 parse_attr
= attr
->parse_attr
;
4199 filter_dev
= parse_attr
->filter_dev
;
4200 parse_state
= &parse_attr
->parse_state
;
4201 mlx5e_tc_act_init_parse_state(parse_state
, flow
, flow_action
, extack
);
4202 parse_state
->ct_priv
= get_ct_priv(priv
);
4204 err
= parse_tc_actions(parse_state
, flow_action
);
4208 /* Forward to/from internal port can only have 1 dest */
4209 if ((netif_is_ovs_master(filter_dev
) || esw_attr
->dest_int_port
) &&
4210 esw_attr
->out_count
> 1) {
4211 NL_SET_ERR_MSG_MOD(extack
,
4212 "Rules with internal port can have only one destination");
4216 /* Forward from tunnel/internal port to internal port is not supported */
4217 if ((mlx5e_get_tc_tun(filter_dev
) || netif_is_ovs_master(filter_dev
)) &&
4218 esw_attr
->dest_int_port
) {
4219 NL_SET_ERR_MSG_MOD(extack
,
4220 "Forwarding from tunnel/internal port to internal port is not supported");
4224 err
= actions_prepare_mod_hdr_actions(priv
, flow
, attr
, extack
);
4228 if (!actions_match_supported(priv
, flow_action
, parse_state
->actions
,
4229 parse_attr
, flow
, extack
))
4235 static void get_flags(int flags
, unsigned long *flow_flags
)
4237 unsigned long __flow_flags
= 0;
4239 if (flags
& MLX5_TC_FLAG(INGRESS
))
4240 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_INGRESS
);
4241 if (flags
& MLX5_TC_FLAG(EGRESS
))
4242 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_EGRESS
);
4244 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
))
4245 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
4246 if (flags
& MLX5_TC_FLAG(NIC_OFFLOAD
))
4247 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
4248 if (flags
& MLX5_TC_FLAG(FT_OFFLOAD
))
4249 __flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_FT
);
4251 *flow_flags
= __flow_flags
;
4254 static const struct rhashtable_params tc_ht_params
= {
4255 .head_offset
= offsetof(struct mlx5e_tc_flow
, node
),
4256 .key_offset
= offsetof(struct mlx5e_tc_flow
, cookie
),
4257 .key_len
= sizeof(((struct mlx5e_tc_flow
*)0)->cookie
),
4258 .automatic_shrinking
= true,
4261 static struct rhashtable
*get_tc_ht(struct mlx5e_priv
*priv
,
4262 unsigned long flags
)
4264 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
4265 struct mlx5e_rep_priv
*rpriv
;
4267 if (flags
& MLX5_TC_FLAG(ESW_OFFLOAD
)) {
4268 rpriv
= priv
->ppriv
;
4269 return &rpriv
->tc_ht
;
4270 } else /* NIC offload */
4274 static bool is_peer_flow_needed(struct mlx5e_tc_flow
*flow
)
4276 struct mlx5_esw_flow_attr
*esw_attr
= flow
->attr
->esw_attr
;
4277 struct mlx5_flow_attr
*attr
= flow
->attr
;
4278 bool is_rep_ingress
= esw_attr
->in_rep
->vport
!= MLX5_VPORT_UPLINK
&&
4279 flow_flag_test(flow
, INGRESS
);
4280 bool act_is_encap
= !!(attr
->action
&
4281 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT
);
4282 bool esw_paired
= mlx5_devcom_is_paired(esw_attr
->in_mdev
->priv
.devcom
,
4283 MLX5_DEVCOM_ESW_OFFLOADS
);
4288 if ((mlx5_lag_is_sriov(esw_attr
->in_mdev
) ||
4289 mlx5_lag_is_multipath(esw_attr
->in_mdev
)) &&
4290 (is_rep_ingress
|| act_is_encap
))
4293 if (mlx5_lag_is_mpesw(esw_attr
->in_mdev
))
4299 struct mlx5_flow_attr
*
4300 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type
)
4302 u32 ex_attr_size
= (type
== MLX5_FLOW_NAMESPACE_FDB
) ?
4303 sizeof(struct mlx5_esw_flow_attr
) :
4304 sizeof(struct mlx5_nic_flow_attr
);
4305 struct mlx5_flow_attr
*attr
;
4307 attr
= kzalloc(sizeof(*attr
) + ex_attr_size
, GFP_KERNEL
);
4311 INIT_LIST_HEAD(&attr
->list
);
4316 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow
*flow
, struct mlx5_flow_attr
*attr
)
4318 struct mlx5_core_dev
*counter_dev
= get_flow_counter_dev(flow
);
4323 if (attr
->post_act_handle
)
4324 mlx5e_tc_post_act_del(get_post_action(flow
->priv
), attr
->post_act_handle
);
4326 clean_encap_dests(flow
->priv
, flow
, attr
);
4328 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_COUNT
)
4329 mlx5_fc_destroy(counter_dev
, attr
->counter
);
4331 if (attr
->action
& MLX5_FLOW_CONTEXT_ACTION_MOD_HDR
) {
4332 mlx5e_mod_hdr_dealloc(&attr
->parse_attr
->mod_hdr_acts
);
4333 mlx5e_tc_detach_mod_hdr(flow
->priv
, flow
, attr
);
4336 mlx5_tc_ct_delete_flow(get_ct_priv(flow
->priv
), attr
);
4338 free_branch_attr(flow
, attr
->branch_true
);
4339 free_branch_attr(flow
, attr
->branch_false
);
4343 mlx5e_alloc_flow(struct mlx5e_priv
*priv
, int attr_size
,
4344 struct flow_cls_offload
*f
, unsigned long flow_flags
,
4345 struct mlx5e_tc_flow_parse_attr
**__parse_attr
,
4346 struct mlx5e_tc_flow
**__flow
)
4348 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4349 struct mlx5_flow_attr
*attr
;
4350 struct mlx5e_tc_flow
*flow
;
4354 flow
= kzalloc(sizeof(*flow
), GFP_KERNEL
);
4355 parse_attr
= kvzalloc(sizeof(*parse_attr
), GFP_KERNEL
);
4356 if (!parse_attr
|| !flow
)
4359 flow
->flags
= flow_flags
;
4360 flow
->cookie
= f
->cookie
;
4363 attr
= mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow
));
4369 for (out_index
= 0; out_index
< MLX5_MAX_FLOW_FWD_VPORTS
; out_index
++)
4370 INIT_LIST_HEAD(&flow
->encaps
[out_index
].list
);
4371 INIT_LIST_HEAD(&flow
->hairpin
);
4372 INIT_LIST_HEAD(&flow
->l3_to_l2_reformat
);
4373 INIT_LIST_HEAD(&flow
->attrs
);
4374 refcount_set(&flow
->refcnt
, 1);
4375 init_completion(&flow
->init_done
);
4376 init_completion(&flow
->del_hw_done
);
4379 *__parse_attr
= parse_attr
;
4390 mlx5e_flow_attr_init(struct mlx5_flow_attr
*attr
,
4391 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
4392 struct flow_cls_offload
*f
)
4394 attr
->parse_attr
= parse_attr
;
4395 attr
->chain
= f
->common
.chain_index
;
4396 attr
->prio
= f
->common
.prio
;
4400 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr
*attr
,
4401 struct mlx5e_priv
*priv
,
4402 struct mlx5e_tc_flow_parse_attr
*parse_attr
,
4403 struct flow_cls_offload
*f
,
4404 struct mlx5_eswitch_rep
*in_rep
,
4405 struct mlx5_core_dev
*in_mdev
)
4407 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4408 struct mlx5_esw_flow_attr
*esw_attr
= attr
->esw_attr
;
4410 mlx5e_flow_attr_init(attr
, parse_attr
, f
);
4412 esw_attr
->in_rep
= in_rep
;
4413 esw_attr
->in_mdev
= in_mdev
;
4415 if (MLX5_CAP_ESW(esw
->dev
, counter_eswitch_affinity
) ==
4416 MLX5_COUNTER_SOURCE_ESWITCH
)
4417 esw_attr
->counter_dev
= in_mdev
;
4419 esw_attr
->counter_dev
= priv
->mdev
;
4422 static struct mlx5e_tc_flow
*
4423 __mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
4424 struct flow_cls_offload
*f
,
4425 unsigned long flow_flags
,
4426 struct net_device
*filter_dev
,
4427 struct mlx5_eswitch_rep
*in_rep
,
4428 struct mlx5_core_dev
*in_mdev
)
4430 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
4431 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4432 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4433 struct mlx5e_tc_flow
*flow
;
4436 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH
);
4437 attr_size
= sizeof(struct mlx5_esw_flow_attr
);
4438 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
4439 &parse_attr
, &flow
);
4443 parse_attr
->filter_dev
= filter_dev
;
4444 mlx5e_flow_esw_attr_init(flow
->attr
,
4446 f
, in_rep
, in_mdev
);
4448 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
4453 /* actions validation depends on parsing the ct matches first */
4454 err
= mlx5_tc_ct_match_add(get_ct_priv(priv
), &parse_attr
->spec
, f
,
4455 &flow
->attr
->ct_attr
, extack
);
4459 err
= parse_tc_fdb_actions(priv
, &rule
->action
, flow
, extack
);
4463 err
= mlx5e_tc_add_fdb_flow(priv
, flow
, extack
);
4464 complete_all(&flow
->init_done
);
4466 if (!(err
== -ENETUNREACH
&& mlx5_lag_is_multipath(in_mdev
)))
4469 add_unready_flow(flow
);
4475 mlx5e_flow_put(priv
, flow
);
4477 return ERR_PTR(err
);
4480 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload
*f
,
4481 struct mlx5e_tc_flow
*flow
,
4482 unsigned long flow_flags
)
4484 struct mlx5e_priv
*priv
= flow
->priv
, *peer_priv
;
4485 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
, *peer_esw
;
4486 struct mlx5_esw_flow_attr
*attr
= flow
->attr
->esw_attr
;
4487 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
4488 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4489 struct mlx5e_rep_priv
*peer_urpriv
;
4490 struct mlx5e_tc_flow
*peer_flow
;
4491 struct mlx5_core_dev
*in_mdev
;
4494 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4498 peer_urpriv
= mlx5_eswitch_get_uplink_priv(peer_esw
, REP_ETH
);
4499 peer_priv
= netdev_priv(peer_urpriv
->netdev
);
4501 /* in_mdev is assigned of which the packet originated from.
4502 * So packets redirected to uplink use the same mdev of the
4503 * original flow and packets redirected from uplink use the
4505 * In multiport eswitch it's a special case that we need to
4506 * keep the original mdev.
4508 if (attr
->in_rep
->vport
== MLX5_VPORT_UPLINK
&& !mlx5_lag_is_mpesw(priv
->mdev
))
4509 in_mdev
= peer_priv
->mdev
;
4511 in_mdev
= priv
->mdev
;
4513 parse_attr
= flow
->attr
->parse_attr
;
4514 peer_flow
= __mlx5e_add_fdb_flow(peer_priv
, f
, flow_flags
,
4515 parse_attr
->filter_dev
,
4516 attr
->in_rep
, in_mdev
);
4517 if (IS_ERR(peer_flow
)) {
4518 err
= PTR_ERR(peer_flow
);
4522 flow
->peer_flow
= peer_flow
;
4523 flow_flag_set(flow
, DUP
);
4524 mutex_lock(&esw
->offloads
.peer_mutex
);
4525 list_add_tail(&flow
->peer
, &esw
->offloads
.peer_flows
);
4526 mutex_unlock(&esw
->offloads
.peer_mutex
);
4529 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4534 mlx5e_add_fdb_flow(struct mlx5e_priv
*priv
,
4535 struct flow_cls_offload
*f
,
4536 unsigned long flow_flags
,
4537 struct net_device
*filter_dev
,
4538 struct mlx5e_tc_flow
**__flow
)
4540 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4541 struct mlx5_eswitch_rep
*in_rep
= rpriv
->rep
;
4542 struct mlx5_core_dev
*in_mdev
= priv
->mdev
;
4543 struct mlx5e_tc_flow
*flow
;
4546 flow
= __mlx5e_add_fdb_flow(priv
, f
, flow_flags
, filter_dev
, in_rep
,
4549 return PTR_ERR(flow
);
4551 if (is_peer_flow_needed(flow
)) {
4552 err
= mlx5e_tc_add_fdb_peer_flow(f
, flow
, flow_flags
);
4554 mlx5e_tc_del_fdb_flow(priv
, flow
);
4568 mlx5e_add_nic_flow(struct mlx5e_priv
*priv
,
4569 struct flow_cls_offload
*f
,
4570 unsigned long flow_flags
,
4571 struct net_device
*filter_dev
,
4572 struct mlx5e_tc_flow
**__flow
)
4574 struct flow_rule
*rule
= flow_cls_offload_flow_rule(f
);
4575 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4576 struct mlx5e_tc_flow_parse_attr
*parse_attr
;
4577 struct mlx5e_tc_flow
*flow
;
4580 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
)) {
4581 if (!tc_cls_can_offload_and_chain0(priv
->netdev
, &f
->common
))
4583 } else if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
)) {
4587 flow_flags
|= BIT(MLX5E_TC_FLOW_FLAG_NIC
);
4588 attr_size
= sizeof(struct mlx5_nic_flow_attr
);
4589 err
= mlx5e_alloc_flow(priv
, attr_size
, f
, flow_flags
,
4590 &parse_attr
, &flow
);
4594 parse_attr
->filter_dev
= filter_dev
;
4595 mlx5e_flow_attr_init(flow
->attr
, parse_attr
, f
);
4597 err
= parse_cls_flower(flow
->priv
, flow
, &parse_attr
->spec
,
4602 err
= mlx5_tc_ct_match_add(get_ct_priv(priv
), &parse_attr
->spec
, f
,
4603 &flow
->attr
->ct_attr
, extack
);
4607 err
= parse_tc_nic_actions(priv
, &rule
->action
, flow
, extack
);
4611 err
= mlx5e_tc_add_nic_flow(priv
, flow
, extack
);
4615 flow_flag_set(flow
, OFFLOADED
);
4621 flow_flag_set(flow
, FAILED
);
4622 mlx5e_mod_hdr_dealloc(&parse_attr
->mod_hdr_acts
);
4623 mlx5e_flow_put(priv
, flow
);
4629 mlx5e_tc_add_flow(struct mlx5e_priv
*priv
,
4630 struct flow_cls_offload
*f
,
4631 unsigned long flags
,
4632 struct net_device
*filter_dev
,
4633 struct mlx5e_tc_flow
**flow
)
4635 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
4636 unsigned long flow_flags
;
4639 get_flags(flags
, &flow_flags
);
4641 if (!tc_can_offload_extack(priv
->netdev
, f
->common
.extack
))
4644 if (esw
&& esw
->mode
== MLX5_ESWITCH_OFFLOADS
)
4645 err
= mlx5e_add_fdb_flow(priv
, f
, flow_flags
,
4648 err
= mlx5e_add_nic_flow(priv
, f
, flow_flags
,
4654 static bool is_flow_rule_duplicate_allowed(struct net_device
*dev
,
4655 struct mlx5e_rep_priv
*rpriv
)
4657 /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4658 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4659 * function is called from NIC mode.
4661 return netif_is_lag_port(dev
) && rpriv
&& rpriv
->rep
->vport
!= MLX5_VPORT_UPLINK
;
4664 int mlx5e_configure_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4665 struct flow_cls_offload
*f
, unsigned long flags
)
4667 struct netlink_ext_ack
*extack
= f
->common
.extack
;
4668 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4669 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4670 struct mlx5e_tc_flow
*flow
;
4673 if (!mlx5_esw_hold(priv
->mdev
))
4676 mlx5_esw_get(priv
->mdev
);
4679 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
4681 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4684 if (is_flow_rule_duplicate_allowed(dev
, rpriv
) && flow
->orig_dev
!= dev
)
4687 NL_SET_ERR_MSG_MOD(extack
,
4688 "flow cookie already exists, ignoring");
4689 netdev_warn_once(priv
->netdev
,
4690 "flow cookie %lx already exists, ignoring\n",
4700 trace_mlx5e_configure_flower(f
);
4701 err
= mlx5e_tc_add_flow(priv
, f
, flags
, dev
, &flow
);
4705 /* Flow rule offloaded to non-uplink representor sharing tc block,
4706 * set the flow's owner dev.
4708 if (is_flow_rule_duplicate_allowed(dev
, rpriv
))
4709 flow
->orig_dev
= dev
;
4711 err
= rhashtable_lookup_insert_fast(tc_ht
, &flow
->node
, tc_ht_params
);
4715 mlx5_esw_release(priv
->mdev
);
4719 mlx5e_flow_put(priv
, flow
);
4721 mlx5_esw_put(priv
->mdev
);
4722 mlx5_esw_release(priv
->mdev
);
4726 static bool same_flow_direction(struct mlx5e_tc_flow
*flow
, int flags
)
4728 bool dir_ingress
= !!(flags
& MLX5_TC_FLAG(INGRESS
));
4729 bool dir_egress
= !!(flags
& MLX5_TC_FLAG(EGRESS
));
4731 return flow_flag_test(flow
, INGRESS
) == dir_ingress
&&
4732 flow_flag_test(flow
, EGRESS
) == dir_egress
;
4735 int mlx5e_delete_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4736 struct flow_cls_offload
*f
, unsigned long flags
)
4738 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4739 struct mlx5e_tc_flow
*flow
;
4743 flow
= rhashtable_lookup(tc_ht
, &f
->cookie
, tc_ht_params
);
4744 if (!flow
|| !same_flow_direction(flow
, flags
)) {
4749 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4752 if (flow_flag_test_and_set(flow
, DELETED
)) {
4756 rhashtable_remove_fast(tc_ht
, &flow
->node
, tc_ht_params
);
4759 trace_mlx5e_delete_flower(f
);
4760 mlx5e_flow_put(priv
, flow
);
4762 mlx5_esw_put(priv
->mdev
);
4770 int mlx5e_tc_fill_action_stats(struct mlx5e_priv
*priv
,
4771 struct flow_offload_action
*fl_act
)
4773 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv
), fl_act
);
4776 int mlx5e_stats_flower(struct net_device
*dev
, struct mlx5e_priv
*priv
,
4777 struct flow_cls_offload
*f
, unsigned long flags
)
4779 struct mlx5_devcom
*devcom
= priv
->mdev
->priv
.devcom
;
4780 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
4781 struct mlx5_eswitch
*peer_esw
;
4782 struct mlx5e_tc_flow
*flow
;
4783 struct mlx5_fc
*counter
;
4790 flow
= mlx5e_flow_get(rhashtable_lookup(tc_ht
, &f
->cookie
,
4794 return PTR_ERR(flow
);
4796 if (!same_flow_direction(flow
, flags
)) {
4801 if (mlx5e_is_offloaded_flow(flow
)) {
4802 if (flow_flag_test(flow
, USE_ACT_STATS
)) {
4803 f
->use_act_stats
= true;
4805 counter
= mlx5e_tc_get_counter(flow
);
4809 mlx5_fc_query_cached(counter
, &bytes
, &packets
, &lastuse
);
4813 /* Under multipath it's possible for one rule to be currently
4814 * un-offloaded while the other rule is offloaded.
4816 peer_esw
= mlx5_devcom_get_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4820 if (flow_flag_test(flow
, DUP
) &&
4821 flow_flag_test(flow
->peer_flow
, OFFLOADED
)) {
4826 if (flow_flag_test(flow
, USE_ACT_STATS
)) {
4827 f
->use_act_stats
= true;
4829 counter
= mlx5e_tc_get_counter(flow
->peer_flow
);
4831 goto no_peer_counter
;
4832 mlx5_fc_query_cached(counter
, &bytes2
, &packets2
, &lastuse2
);
4835 packets
+= packets2
;
4836 lastuse
= max_t(u64
, lastuse
, lastuse2
);
4841 mlx5_devcom_release_peer_data(devcom
, MLX5_DEVCOM_ESW_OFFLOADS
);
4843 flow_stats_update(&f
->stats
, bytes
, packets
, 0, lastuse
,
4844 FLOW_ACTION_HW_STATS_DELAYED
);
4845 trace_mlx5e_stats_flower(f
);
4847 mlx5e_flow_put(priv
, flow
);
4851 static int apply_police_params(struct mlx5e_priv
*priv
, u64 rate
,
4852 struct netlink_ext_ack
*extack
)
4854 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4855 struct mlx5_eswitch
*esw
;
4860 vport_num
= rpriv
->rep
->vport
;
4861 if (vport_num
>= MLX5_VPORT_ECPF
) {
4862 NL_SET_ERR_MSG_MOD(extack
,
4863 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4867 esw
= priv
->mdev
->priv
.eswitch
;
4868 /* rate is given in bytes/sec.
4869 * First convert to bits/sec and then round to the nearest mbit/secs.
4870 * mbit means million bits.
4871 * Moreover, if rate is non zero we choose to configure to a minimum of
4875 rate
= (rate
* BITS_PER_BYTE
) + 500000;
4876 do_div(rate
, 1000000);
4877 rate_mbps
= max_t(u32
, rate
, 1);
4880 err
= mlx5_esw_qos_modify_vport_rate(esw
, vport_num
, rate_mbps
);
4882 NL_SET_ERR_MSG_MOD(extack
, "failed applying action to hardware");
4888 tc_matchall_police_validate(const struct flow_action
*action
,
4889 const struct flow_action_entry
*act
,
4890 struct netlink_ext_ack
*extack
)
4892 if (act
->police
.notexceed
.act_id
!= FLOW_ACTION_CONTINUE
) {
4893 NL_SET_ERR_MSG_MOD(extack
,
4894 "Offload not supported when conform action is not continue");
4898 if (act
->police
.exceed
.act_id
!= FLOW_ACTION_DROP
) {
4899 NL_SET_ERR_MSG_MOD(extack
,
4900 "Offload not supported when exceed action is not drop");
4904 if (act
->police
.notexceed
.act_id
== FLOW_ACTION_ACCEPT
&&
4905 !flow_action_is_last_entry(action
, act
)) {
4906 NL_SET_ERR_MSG_MOD(extack
,
4907 "Offload not supported when conform action is ok, but action is not last");
4911 if (act
->police
.peakrate_bytes_ps
||
4912 act
->police
.avrate
|| act
->police
.overhead
) {
4913 NL_SET_ERR_MSG_MOD(extack
,
4914 "Offload not supported when peakrate/avrate/overhead is configured");
4921 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv
*priv
,
4922 struct flow_action
*flow_action
,
4923 struct netlink_ext_ack
*extack
)
4925 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4926 const struct flow_action_entry
*act
;
4930 if (!flow_action_has_entries(flow_action
)) {
4931 NL_SET_ERR_MSG_MOD(extack
, "matchall called with no action");
4935 if (!flow_offload_has_one_action(flow_action
)) {
4936 NL_SET_ERR_MSG_MOD(extack
, "matchall policing support only a single action");
4940 if (!flow_action_basic_hw_stats_check(flow_action
, extack
)) {
4941 NL_SET_ERR_MSG_MOD(extack
, "Flow action HW stats type is not supported");
4945 flow_action_for_each(i
, act
, flow_action
) {
4947 case FLOW_ACTION_POLICE
:
4948 err
= tc_matchall_police_validate(flow_action
, act
, extack
);
4952 err
= apply_police_params(priv
, act
->police
.rate_bytes_ps
, extack
);
4956 rpriv
->prev_vf_vport_stats
= priv
->stats
.vf_vport
;
4959 NL_SET_ERR_MSG_MOD(extack
, "mlx5 supports only police action for matchall");
4967 int mlx5e_tc_configure_matchall(struct mlx5e_priv
*priv
,
4968 struct tc_cls_matchall_offload
*ma
)
4970 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4972 if (ma
->common
.prio
!= 1) {
4973 NL_SET_ERR_MSG_MOD(extack
, "only priority 1 is supported");
4977 return scan_tc_matchall_fdb_actions(priv
, &ma
->rule
->action
, extack
);
4980 int mlx5e_tc_delete_matchall(struct mlx5e_priv
*priv
,
4981 struct tc_cls_matchall_offload
*ma
)
4983 struct netlink_ext_ack
*extack
= ma
->common
.extack
;
4985 return apply_police_params(priv
, 0, extack
);
4988 void mlx5e_tc_stats_matchall(struct mlx5e_priv
*priv
,
4989 struct tc_cls_matchall_offload
*ma
)
4991 struct mlx5e_rep_priv
*rpriv
= priv
->ppriv
;
4992 struct rtnl_link_stats64 cur_stats
;
4996 cur_stats
= priv
->stats
.vf_vport
;
4997 dpkts
= cur_stats
.rx_packets
- rpriv
->prev_vf_vport_stats
.rx_packets
;
4998 dbytes
= cur_stats
.rx_bytes
- rpriv
->prev_vf_vport_stats
.rx_bytes
;
4999 rpriv
->prev_vf_vport_stats
= cur_stats
;
5000 flow_stats_update(&ma
->stats
, dbytes
, dpkts
, 0, jiffies
,
5001 FLOW_ACTION_HW_STATS_DELAYED
);
5004 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv
*priv
,
5005 struct mlx5e_priv
*peer_priv
)
5007 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
5008 struct mlx5_core_dev
*peer_mdev
= peer_priv
->mdev
;
5009 struct mlx5e_hairpin_entry
*hpe
, *tmp
;
5010 LIST_HEAD(init_wait_list
);
5014 if (!mlx5e_same_hw_devs(priv
, peer_priv
))
5017 peer_vhca_id
= MLX5_CAP_GEN(peer_mdev
, vhca_id
);
5019 mutex_lock(&tc
->hairpin_tbl_lock
);
5020 hash_for_each(tc
->hairpin_tbl
, bkt
, hpe
, hairpin_hlist
)
5021 if (refcount_inc_not_zero(&hpe
->refcnt
))
5022 list_add(&hpe
->dead_peer_wait_list
, &init_wait_list
);
5023 mutex_unlock(&tc
->hairpin_tbl_lock
);
5025 list_for_each_entry_safe(hpe
, tmp
, &init_wait_list
, dead_peer_wait_list
) {
5026 wait_for_completion(&hpe
->res_ready
);
5027 if (!IS_ERR_OR_NULL(hpe
->hp
) && hpe
->peer_vhca_id
== peer_vhca_id
)
5028 mlx5_core_hairpin_clear_dead_peer(hpe
->hp
->pair
);
5030 mlx5e_hairpin_put(priv
, hpe
);
5034 static int mlx5e_tc_netdev_event(struct notifier_block
*this,
5035 unsigned long event
, void *ptr
)
5037 struct net_device
*ndev
= netdev_notifier_info_to_dev(ptr
);
5038 struct mlx5e_priv
*peer_priv
;
5039 struct mlx5e_tc_table
*tc
;
5040 struct mlx5e_priv
*priv
;
5042 if (ndev
->netdev_ops
!= &mlx5e_netdev_ops
||
5043 event
!= NETDEV_UNREGISTER
||
5044 ndev
->reg_state
== NETREG_REGISTERED
)
5047 tc
= container_of(this, struct mlx5e_tc_table
, netdevice_nb
);
5049 peer_priv
= netdev_priv(ndev
);
5050 if (priv
== peer_priv
||
5051 !(priv
->netdev
->features
& NETIF_F_HW_TC
))
5054 mlx5e_tc_hairpin_update_dead_peer(priv
, peer_priv
);
5059 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv
*priv
)
5061 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
5062 struct mlx5_flow_table
**ft
= &tc
->miss_t
;
5063 struct mlx5_flow_table_attr ft_attr
= {};
5064 struct mlx5_flow_namespace
*ns
;
5067 ft_attr
.max_fte
= 1;
5068 ft_attr
.autogroup
.max_num_groups
= 1;
5069 ft_attr
.level
= MLX5E_TC_MISS_LEVEL
;
5071 ns
= mlx5_get_flow_namespace(priv
->mdev
, MLX5_FLOW_NAMESPACE_KERNEL
);
5073 *ft
= mlx5_create_auto_grouped_flow_table(ns
, &ft_attr
);
5076 netdev_err(priv
->netdev
, "failed to create tc nic miss table err=%d\n", err
);
5082 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv
*priv
)
5084 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
5086 mlx5_destroy_flow_table(tc
->miss_t
);
5089 int mlx5e_tc_nic_init(struct mlx5e_priv
*priv
)
5091 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
5092 struct mlx5_core_dev
*dev
= priv
->mdev
;
5093 struct mapping_ctx
*chains_mapping
;
5094 struct mlx5_chains_attr attr
= {};
5098 mlx5e_mod_hdr_tbl_init(&tc
->mod_hdr
);
5099 mutex_init(&tc
->t_lock
);
5100 mutex_init(&tc
->hairpin_tbl_lock
);
5101 hash_init(tc
->hairpin_tbl
);
5104 err
= rhashtable_init(&tc
->ht
, &tc_ht_params
);
5108 lockdep_set_class(&tc
->ht
.mutex
, &tc_ht_lock_key
);
5109 lockdep_init_map(&tc
->ht
.run_work
.lockdep_map
, "tc_ht_wq_key", &tc_ht_wq_key
, 0);
5111 mapping_id
= mlx5_query_nic_system_image_guid(dev
);
5113 chains_mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_CHAIN
,
5114 sizeof(struct mlx5_mapped_obj
),
5115 MLX5E_TC_TABLE_CHAIN_TAG_MASK
, true);
5117 if (IS_ERR(chains_mapping
)) {
5118 err
= PTR_ERR(chains_mapping
);
5121 tc
->mapping
= chains_mapping
;
5123 err
= mlx5e_tc_nic_create_miss_table(priv
);
5127 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv
->mdev
, ignore_flow_level
))
5128 attr
.flags
= MLX5_CHAINS_AND_PRIOS_SUPPORTED
|
5129 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED
;
5130 attr
.ns
= MLX5_FLOW_NAMESPACE_KERNEL
;
5131 attr
.max_grp_num
= MLX5E_TC_TABLE_NUM_GROUPS
;
5132 attr
.default_ft
= tc
->miss_t
;
5133 attr
.mapping
= chains_mapping
;
5134 attr
.fs_base_prio
= MLX5E_TC_PRIO
;
5136 tc
->chains
= mlx5_chains_create(dev
, &attr
);
5137 if (IS_ERR(tc
->chains
)) {
5138 err
= PTR_ERR(tc
->chains
);
5142 mlx5_chains_print_info(tc
->chains
);
5144 tc
->post_act
= mlx5e_tc_post_act_init(priv
, tc
->chains
, MLX5_FLOW_NAMESPACE_KERNEL
);
5145 tc
->ct
= mlx5_tc_ct_init(priv
, tc
->chains
, &tc
->mod_hdr
,
5146 MLX5_FLOW_NAMESPACE_KERNEL
, tc
->post_act
);
5148 tc
->netdevice_nb
.notifier_call
= mlx5e_tc_netdev_event
;
5149 err
= register_netdevice_notifier_dev_net(priv
->netdev
,
5153 tc
->netdevice_nb
.notifier_call
= NULL
;
5154 mlx5_core_warn(priv
->mdev
, "Failed to register netdev notifier\n");
5158 mlx5e_tc_debugfs_init(tc
, mlx5e_fs_get_debugfs_root(priv
->fs
));
5160 tc
->action_stats_handle
= mlx5e_tc_act_stats_create();
5161 if (IS_ERR(tc
->action_stats_handle
)) {
5162 err
= PTR_ERR(tc
->action_stats_handle
);
5169 unregister_netdevice_notifier_dev_net(priv
->netdev
,
5173 mlx5_tc_ct_clean(tc
->ct
);
5174 mlx5e_tc_post_act_destroy(tc
->post_act
);
5175 mlx5_chains_destroy(tc
->chains
);
5177 mlx5e_tc_nic_destroy_miss_table(priv
);
5179 mapping_destroy(chains_mapping
);
5181 rhashtable_destroy(&tc
->ht
);
5185 static void _mlx5e_tc_del_flow(void *ptr
, void *arg
)
5187 struct mlx5e_tc_flow
*flow
= ptr
;
5188 struct mlx5e_priv
*priv
= flow
->priv
;
5190 mlx5e_tc_del_flow(priv
, flow
);
5194 void mlx5e_tc_nic_cleanup(struct mlx5e_priv
*priv
)
5196 struct mlx5e_tc_table
*tc
= mlx5e_fs_get_tc(priv
->fs
);
5198 debugfs_remove_recursive(tc
->dfs_root
);
5200 if (tc
->netdevice_nb
.notifier_call
)
5201 unregister_netdevice_notifier_dev_net(priv
->netdev
,
5205 mlx5e_mod_hdr_tbl_destroy(&tc
->mod_hdr
);
5206 mutex_destroy(&tc
->hairpin_tbl_lock
);
5208 rhashtable_free_and_destroy(&tc
->ht
, _mlx5e_tc_del_flow
, NULL
);
5210 if (!IS_ERR_OR_NULL(tc
->t
)) {
5211 mlx5_chains_put_table(tc
->chains
, 0, 1, MLX5E_TC_FT_LEVEL
);
5214 mutex_destroy(&tc
->t_lock
);
5216 mlx5_tc_ct_clean(tc
->ct
);
5217 mlx5e_tc_post_act_destroy(tc
->post_act
);
5218 mapping_destroy(tc
->mapping
);
5219 mlx5_chains_destroy(tc
->chains
);
5220 mlx5e_tc_nic_destroy_miss_table(priv
);
5221 mlx5e_tc_act_stats_free(tc
->action_stats_handle
);
5224 int mlx5e_tc_ht_init(struct rhashtable
*tc_ht
)
5228 err
= rhashtable_init(tc_ht
, &tc_ht_params
);
5232 lockdep_set_class(&tc_ht
->mutex
, &tc_ht_lock_key
);
5233 lockdep_init_map(&tc_ht
->run_work
.lockdep_map
, "tc_ht_wq_key", &tc_ht_wq_key
, 0);
5238 void mlx5e_tc_ht_cleanup(struct rhashtable
*tc_ht
)
5240 rhashtable_free_and_destroy(tc_ht
, _mlx5e_tc_del_flow
, NULL
);
5243 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv
*uplink_priv
)
5245 const size_t sz_enc_opts
= sizeof(struct tunnel_match_enc_opts
);
5246 struct mlx5e_rep_priv
*rpriv
;
5247 struct mapping_ctx
*mapping
;
5248 struct mlx5_eswitch
*esw
;
5249 struct mlx5e_priv
*priv
;
5253 rpriv
= container_of(uplink_priv
, struct mlx5e_rep_priv
, uplink_priv
);
5254 priv
= netdev_priv(rpriv
->netdev
);
5255 esw
= priv
->mdev
->priv
.eswitch
;
5257 uplink_priv
->post_act
= mlx5e_tc_post_act_init(priv
, esw_chains(esw
),
5258 MLX5_FLOW_NAMESPACE_FDB
);
5259 uplink_priv
->ct_priv
= mlx5_tc_ct_init(netdev_priv(priv
->netdev
),
5261 &esw
->offloads
.mod_hdr
,
5262 MLX5_FLOW_NAMESPACE_FDB
,
5263 uplink_priv
->post_act
);
5265 uplink_priv
->int_port_priv
= mlx5e_tc_int_port_init(netdev_priv(priv
->netdev
));
5267 uplink_priv
->tc_psample
= mlx5e_tc_sample_init(esw
, uplink_priv
->post_act
);
5269 mapping_id
= mlx5_query_nic_system_image_guid(esw
->dev
);
5271 mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_TUNNEL
,
5272 sizeof(struct tunnel_match_key
),
5273 TUNNEL_INFO_BITS_MASK
, true);
5275 if (IS_ERR(mapping
)) {
5276 err
= PTR_ERR(mapping
);
5277 goto err_tun_mapping
;
5279 uplink_priv
->tunnel_mapping
= mapping
;
5281 /* Two last values are reserved for stack devices slow path table mark
5282 * and bridge ingress push mark.
5284 mapping
= mapping_create_for_id(mapping_id
, MAPPING_TYPE_TUNNEL_ENC_OPTS
,
5285 sz_enc_opts
, ENC_OPTS_BITS_MASK
- 2, true);
5286 if (IS_ERR(mapping
)) {
5287 err
= PTR_ERR(mapping
);
5288 goto err_enc_opts_mapping
;
5290 uplink_priv
->tunnel_enc_opts_mapping
= mapping
;
5292 uplink_priv
->encap
= mlx5e_tc_tun_init(priv
);
5293 if (IS_ERR(uplink_priv
->encap
)) {
5294 err
= PTR_ERR(uplink_priv
->encap
);
5295 goto err_register_fib_notifier
;
5298 uplink_priv
->action_stats_handle
= mlx5e_tc_act_stats_create();
5299 if (IS_ERR(uplink_priv
->action_stats_handle
)) {
5300 err
= PTR_ERR(uplink_priv
->action_stats_handle
);
5301 goto err_action_counter
;
5307 mlx5e_tc_tun_cleanup(uplink_priv
->encap
);
5308 err_register_fib_notifier
:
5309 mapping_destroy(uplink_priv
->tunnel_enc_opts_mapping
);
5310 err_enc_opts_mapping
:
5311 mapping_destroy(uplink_priv
->tunnel_mapping
);
5313 mlx5e_tc_sample_cleanup(uplink_priv
->tc_psample
);
5314 mlx5e_tc_int_port_cleanup(uplink_priv
->int_port_priv
);
5315 mlx5_tc_ct_clean(uplink_priv
->ct_priv
);
5316 netdev_warn(priv
->netdev
,
5317 "Failed to initialize tc (eswitch), err: %d", err
);
5318 mlx5e_tc_post_act_destroy(uplink_priv
->post_act
);
5322 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv
*uplink_priv
)
5324 struct mlx5e_rep_priv
*rpriv
;
5325 struct mlx5_eswitch
*esw
;
5326 struct mlx5e_priv
*priv
;
5328 rpriv
= container_of(uplink_priv
, struct mlx5e_rep_priv
, uplink_priv
);
5329 priv
= netdev_priv(rpriv
->netdev
);
5330 esw
= priv
->mdev
->priv
.eswitch
;
5332 mlx5e_tc_clean_fdb_peer_flows(esw
);
5334 mlx5e_tc_tun_cleanup(uplink_priv
->encap
);
5336 mapping_destroy(uplink_priv
->tunnel_enc_opts_mapping
);
5337 mapping_destroy(uplink_priv
->tunnel_mapping
);
5339 mlx5e_tc_sample_cleanup(uplink_priv
->tc_psample
);
5340 mlx5e_tc_int_port_cleanup(uplink_priv
->int_port_priv
);
5341 mlx5_tc_ct_clean(uplink_priv
->ct_priv
);
5342 mlx5e_flow_meters_cleanup(uplink_priv
->flow_meters
);
5343 mlx5e_tc_post_act_destroy(uplink_priv
->post_act
);
5344 mlx5e_tc_act_stats_free(uplink_priv
->action_stats_handle
);
5347 int mlx5e_tc_num_filters(struct mlx5e_priv
*priv
, unsigned long flags
)
5349 struct rhashtable
*tc_ht
= get_tc_ht(priv
, flags
);
5351 return atomic_read(&tc_ht
->nelems
);
5354 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch
*esw
)
5356 struct mlx5e_tc_flow
*flow
, *tmp
;
5358 list_for_each_entry_safe(flow
, tmp
, &esw
->offloads
.peer_flows
, peer
)
5359 __mlx5e_tc_del_fdb_peer_flow(flow
);
5362 void mlx5e_tc_reoffload_flows_work(struct work_struct
*work
)
5364 struct mlx5_rep_uplink_priv
*rpriv
=
5365 container_of(work
, struct mlx5_rep_uplink_priv
,
5366 reoffload_flows_work
);
5367 struct mlx5e_tc_flow
*flow
, *tmp
;
5369 mutex_lock(&rpriv
->unready_flows_lock
);
5370 list_for_each_entry_safe(flow
, tmp
, &rpriv
->unready_flows
, unready
) {
5371 if (!mlx5e_tc_add_fdb_flow(flow
->priv
, flow
, NULL
))
5372 unready_flow_del(flow
);
5374 mutex_unlock(&rpriv
->unready_flows_lock
);
5377 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv
*priv
,
5378 struct flow_cls_offload
*cls_flower
,
5379 unsigned long flags
)
5381 switch (cls_flower
->command
) {
5382 case FLOW_CLS_REPLACE
:
5383 return mlx5e_configure_flower(priv
->netdev
, priv
, cls_flower
,
5385 case FLOW_CLS_DESTROY
:
5386 return mlx5e_delete_flower(priv
->netdev
, priv
, cls_flower
,
5388 case FLOW_CLS_STATS
:
5389 return mlx5e_stats_flower(priv
->netdev
, priv
, cls_flower
,
5396 int mlx5e_setup_tc_block_cb(enum tc_setup_type type
, void *type_data
,
5399 unsigned long flags
= MLX5_TC_FLAG(INGRESS
);
5400 struct mlx5e_priv
*priv
= cb_priv
;
5402 if (!priv
->netdev
|| !netif_device_present(priv
->netdev
))
5405 if (mlx5e_is_uplink_rep(priv
))
5406 flags
|= MLX5_TC_FLAG(ESW_OFFLOAD
);
5408 flags
|= MLX5_TC_FLAG(NIC_OFFLOAD
);
5411 case TC_SETUP_CLSFLOWER
:
5412 return mlx5e_setup_tc_cls_flower(priv
, type_data
, flags
);
5418 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv
*priv
, struct sk_buff
*skb
,
5419 struct mlx5e_tc_update_priv
*tc_priv
,
5422 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
5423 struct tunnel_match_enc_opts enc_opts
= {};
5424 struct mlx5_rep_uplink_priv
*uplink_priv
;
5425 struct mlx5e_rep_priv
*uplink_rpriv
;
5426 struct metadata_dst
*tun_dst
;
5427 struct tunnel_match_key key
;
5428 u32 tun_id
, enc_opts_id
;
5429 struct net_device
*dev
;
5432 enc_opts_id
= tunnel_id
& ENC_OPTS_BITS_MASK
;
5433 tun_id
= tunnel_id
>> ENC_OPTS_BITS
;
5438 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
5439 uplink_priv
= &uplink_rpriv
->uplink_priv
;
5441 err
= mapping_find(uplink_priv
->tunnel_mapping
, tun_id
, &key
);
5443 netdev_dbg(priv
->netdev
,
5444 "Couldn't find tunnel for tun_id: %d, err: %d\n",
5450 err
= mapping_find(uplink_priv
->tunnel_enc_opts_mapping
,
5451 enc_opts_id
, &enc_opts
);
5453 netdev_dbg(priv
->netdev
,
5454 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
5460 switch (key
.enc_control
.addr_type
) {
5461 case FLOW_DISSECTOR_KEY_IPV4_ADDRS
:
5462 tun_dst
= __ip_tun_set_dst(key
.enc_ipv4
.src
, key
.enc_ipv4
.dst
,
5463 key
.enc_ip
.tos
, key
.enc_ip
.ttl
,
5464 key
.enc_tp
.dst
, TUNNEL_KEY
,
5465 key32_to_tunnel_id(key
.enc_key_id
.keyid
),
5468 case FLOW_DISSECTOR_KEY_IPV6_ADDRS
:
5469 tun_dst
= __ipv6_tun_set_dst(&key
.enc_ipv6
.src
, &key
.enc_ipv6
.dst
,
5470 key
.enc_ip
.tos
, key
.enc_ip
.ttl
,
5471 key
.enc_tp
.dst
, 0, TUNNEL_KEY
,
5472 key32_to_tunnel_id(key
.enc_key_id
.keyid
),
5476 netdev_dbg(priv
->netdev
,
5477 "Couldn't restore tunnel, unsupported addr_type: %d\n",
5478 key
.enc_control
.addr_type
);
5483 netdev_dbg(priv
->netdev
, "Couldn't restore tunnel, no tun_dst\n");
5487 tun_dst
->u
.tun_info
.key
.tp_src
= key
.enc_tp
.src
;
5489 if (enc_opts
.key
.len
)
5490 ip_tunnel_info_opts_set(&tun_dst
->u
.tun_info
,
5493 enc_opts
.key
.dst_opt_type
);
5495 skb_dst_set(skb
, (struct dst_entry
*)tun_dst
);
5496 dev
= dev_get_by_index(&init_net
, key
.filter_ifindex
);
5498 netdev_dbg(priv
->netdev
,
5499 "Couldn't find tunnel device with ifindex: %d\n",
5500 key
.filter_ifindex
);
5504 /* Set fwd_dev so we do dev_put() after datapath */
5505 tc_priv
->fwd_dev
= dev
;
5512 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff
*skb
, struct mlx5_tc_ct_priv
*ct_priv
,
5513 struct mlx5_mapped_obj
*mapped_obj
, u32 zone_restore_id
,
5514 u32 tunnel_id
, struct mlx5e_tc_update_priv
*tc_priv
)
5516 struct mlx5e_priv
*priv
= netdev_priv(skb
->dev
);
5517 struct tc_skb_ext
*tc_skb_ext
;
5518 u64 act_miss_cookie
;
5521 chain
= mapped_obj
->type
== MLX5_MAPPED_OBJ_CHAIN
? mapped_obj
->chain
: 0;
5522 act_miss_cookie
= mapped_obj
->type
== MLX5_MAPPED_OBJ_ACT_MISS
?
5523 mapped_obj
->act_miss_cookie
: 0;
5524 if (chain
|| act_miss_cookie
) {
5525 if (!mlx5e_tc_ct_restore_flow(ct_priv
, skb
, zone_restore_id
))
5528 tc_skb_ext
= tc_skb_ext_alloc(skb
);
5534 if (act_miss_cookie
) {
5535 tc_skb_ext
->act_miss_cookie
= act_miss_cookie
;
5536 tc_skb_ext
->act_miss
= 1;
5538 tc_skb_ext
->chain
= chain
;
5543 return mlx5e_tc_restore_tunnel(priv
, skb
, tc_priv
, tunnel_id
);
5548 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv
*priv
, struct sk_buff
*skb
,
5549 struct mlx5_mapped_obj
*mapped_obj
,
5550 struct mlx5e_tc_update_priv
*tc_priv
)
5552 if (!mlx5e_tc_restore_tunnel(priv
, skb
, tc_priv
, mapped_obj
->sample
.tunnel_id
)) {
5553 netdev_dbg(priv
->netdev
,
5554 "Failed to restore tunnel info for sampled packet\n");
5557 mlx5e_tc_sample_skb(skb
, mapped_obj
);
5560 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv
*priv
, struct sk_buff
*skb
,
5561 struct mlx5_mapped_obj
*mapped_obj
,
5562 struct mlx5e_tc_update_priv
*tc_priv
,
5565 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
5566 struct mlx5_rep_uplink_priv
*uplink_priv
;
5567 struct mlx5e_rep_priv
*uplink_rpriv
;
5568 bool forward_tx
= false;
5570 /* Tunnel restore takes precedence over int port restore */
5572 return mlx5e_tc_restore_tunnel(priv
, skb
, tc_priv
, tunnel_id
);
5574 uplink_rpriv
= mlx5_eswitch_get_uplink_priv(esw
, REP_ETH
);
5575 uplink_priv
= &uplink_rpriv
->uplink_priv
;
5577 if (mlx5e_tc_int_port_dev_fwd(uplink_priv
->int_port_priv
, skb
,
5578 mapped_obj
->int_port_metadata
, &forward_tx
)) {
5579 /* Set fwd_dev for future dev_put */
5580 tc_priv
->fwd_dev
= skb
->dev
;
5581 tc_priv
->forward_tx
= forward_tx
;
5589 bool mlx5e_tc_update_skb(struct mlx5_cqe64
*cqe
, struct sk_buff
*skb
,
5590 struct mapping_ctx
*mapping_ctx
, u32 mapped_obj_id
,
5591 struct mlx5_tc_ct_priv
*ct_priv
,
5592 u32 zone_restore_id
, u32 tunnel_id
,
5593 struct mlx5e_tc_update_priv
*tc_priv
)
5595 struct mlx5e_priv
*priv
= netdev_priv(skb
->dev
);
5596 struct mlx5_mapped_obj mapped_obj
;
5599 err
= mapping_find(mapping_ctx
, mapped_obj_id
, &mapped_obj
);
5601 netdev_dbg(skb
->dev
,
5602 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n",
5603 mapped_obj_id
, err
);
5607 switch (mapped_obj
.type
) {
5608 case MLX5_MAPPED_OBJ_CHAIN
:
5609 case MLX5_MAPPED_OBJ_ACT_MISS
:
5610 return mlx5e_tc_restore_skb_tc_meta(skb
, ct_priv
, &mapped_obj
, zone_restore_id
,
5611 tunnel_id
, tc_priv
);
5612 case MLX5_MAPPED_OBJ_SAMPLE
:
5613 mlx5e_tc_restore_skb_sample(priv
, skb
, &mapped_obj
, tc_priv
);
5614 tc_priv
->skb_done
= true;
5616 case MLX5_MAPPED_OBJ_INT_PORT_METADATA
:
5617 return mlx5e_tc_restore_skb_int_port(priv
, skb
, &mapped_obj
, tc_priv
, tunnel_id
);
5619 netdev_dbg(priv
->netdev
, "Invalid mapped object type: %d\n", mapped_obj
.type
);
5626 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64
*cqe
, struct sk_buff
*skb
)
5628 struct mlx5e_priv
*priv
= netdev_priv(skb
->dev
);
5629 u32 mapped_obj_id
, reg_b
, zone_restore_id
;
5630 struct mlx5_tc_ct_priv
*ct_priv
;
5631 struct mapping_ctx
*mapping_ctx
;
5632 struct mlx5e_tc_table
*tc
;
5634 reg_b
= be32_to_cpu(cqe
->ft_metadata
);
5635 tc
= mlx5e_fs_get_tc(priv
->fs
);
5636 mapped_obj_id
= reg_b
& MLX5E_TC_TABLE_CHAIN_TAG_MASK
;
5637 zone_restore_id
= (reg_b
>> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG
)) &
5640 mapping_ctx
= tc
->mapping
;
5642 return mlx5e_tc_update_skb(cqe
, skb
, mapping_ctx
, mapped_obj_id
, ct_priv
, zone_restore_id
,
5646 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv
*priv
, struct mlx5_flow_attr
*attr
,
5647 u64 act_miss_cookie
, u32
*act_miss_mapping
)
5649 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
5650 struct mlx5_mapped_obj mapped_obj
= {};
5651 struct mapping_ctx
*ctx
;
5654 ctx
= esw
->offloads
.reg_c0_obj_pool
;
5656 mapped_obj
.type
= MLX5_MAPPED_OBJ_ACT_MISS
;
5657 mapped_obj
.act_miss_cookie
= act_miss_cookie
;
5658 err
= mapping_add(ctx
, &mapped_obj
, act_miss_mapping
);
5662 attr
->act_id_restore_rule
= esw_add_restore_rule(esw
, *act_miss_mapping
);
5663 if (IS_ERR(attr
->act_id_restore_rule
))
5669 mapping_remove(ctx
, *act_miss_mapping
);
5673 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv
*priv
, struct mlx5_flow_attr
*attr
,
5674 u32 act_miss_mapping
)
5676 struct mlx5_eswitch
*esw
= priv
->mdev
->priv
.eswitch
;
5677 struct mapping_ctx
*ctx
;
5679 ctx
= esw
->offloads
.reg_c0_obj_pool
;
5680 mlx5_del_flow_rules(attr
->act_id_restore_rule
);
5681 mapping_remove(ctx
, act_miss_mapping
);