]> git.ipfire.org Git - thirdparty/kernel/stable.git/blob - drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
728b82ce4031a2b6ce3e0434064980f39985ff2e
[thirdparty/kernel/stable.git] / drivers / net / ethernet / mellanox / mlx5 / core / en_tc.c
1 /*
2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
42 #include <net/arp.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
46 #include <net/dst_metadata.h>
47 #include "devlink.h"
48 #include "en.h"
49 #include "en/tc/post_act.h"
50 #include "en/tc/act_stats.h"
51 #include "en_rep.h"
52 #include "en/rep/tc.h"
53 #include "en/rep/neigh.h"
54 #include "en_tc.h"
55 #include "eswitch.h"
56 #include "fs_core.h"
57 #include "en/port.h"
58 #include "en/tc_tun.h"
59 #include "en/mapping.h"
60 #include "en/tc_ct.h"
61 #include "en/mod_hdr.h"
62 #include "en/tc_tun_encap.h"
63 #include "en/tc/sample.h"
64 #include "en/tc/act/act.h"
65 #include "en/tc/post_meter.h"
66 #include "lib/devcom.h"
67 #include "lib/geneve.h"
68 #include "lib/fs_chains.h"
69 #include "diag/en_tc_tracepoint.h"
70 #include <asm/div64.h>
71 #include "lag/lag.h"
72 #include "lag/mp.h"
73
74 #define MLX5E_TC_TABLE_NUM_GROUPS 4
75 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
76
77 struct mlx5e_tc_table {
78 /* Protects the dynamic assignment of the t parameter
79 * which is the nic tc root table.
80 */
81 struct mutex t_lock;
82 struct mlx5e_priv *priv;
83 struct mlx5_flow_table *t;
84 struct mlx5_flow_table *miss_t;
85 struct mlx5_fs_chains *chains;
86 struct mlx5e_post_act *post_act;
87
88 struct rhashtable ht;
89
90 struct mod_hdr_tbl mod_hdr;
91 struct mutex hairpin_tbl_lock; /* protects hairpin_tbl */
92 DECLARE_HASHTABLE(hairpin_tbl, 8);
93
94 struct notifier_block netdevice_nb;
95 struct netdev_net_notifier netdevice_nn;
96
97 struct mlx5_tc_ct_priv *ct;
98 struct mapping_ctx *mapping;
99 struct dentry *dfs_root;
100
101 /* tc action stats */
102 struct mlx5e_tc_act_stats_handle *action_stats_handle;
103 };
104
105 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
106 [MAPPED_OBJ_TO_REG] = {
107 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
108 .moffset = 0,
109 .mlen = 16,
110 },
111 [VPORT_TO_REG] = {
112 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
113 .moffset = 16,
114 .mlen = 16,
115 },
116 [TUNNEL_TO_REG] = {
117 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
118 .moffset = 8,
119 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
120 .soffset = MLX5_BYTE_OFF(fte_match_param,
121 misc_parameters_2.metadata_reg_c_1),
122 },
123 [ZONE_TO_REG] = zone_to_reg_ct,
124 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
125 [CTSTATE_TO_REG] = ctstate_to_reg_ct,
126 [MARK_TO_REG] = mark_to_reg_ct,
127 [LABELS_TO_REG] = labels_to_reg_ct,
128 [FTEID_TO_REG] = fteid_to_reg_ct,
129 /* For NIC rules we store the restore metadata directly
130 * into reg_b that is passed to SW since we don't
131 * jump between steering domains.
132 */
133 [NIC_MAPPED_OBJ_TO_REG] = {
134 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
135 .moffset = 0,
136 .mlen = 16,
137 },
138 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
139 [PACKET_COLOR_TO_REG] = packet_color_to_reg,
140 };
141
142 struct mlx5e_tc_jump_state {
143 u32 jump_count;
144 bool jump_target;
145 struct mlx5_flow_attr *jumping_attr;
146
147 enum flow_action_id last_id;
148 u32 last_index;
149 };
150
151 struct mlx5e_tc_table *mlx5e_tc_table_alloc(void)
152 {
153 struct mlx5e_tc_table *tc;
154
155 tc = kvzalloc(sizeof(*tc), GFP_KERNEL);
156 return tc ? tc : ERR_PTR(-ENOMEM);
157 }
158
159 void mlx5e_tc_table_free(struct mlx5e_tc_table *tc)
160 {
161 kvfree(tc);
162 }
163
164 struct mlx5_fs_chains *mlx5e_nic_chains(struct mlx5e_tc_table *tc)
165 {
166 return tc->chains;
167 }
168
169 /* To avoid false lock dependency warning set the tc_ht lock
170 * class different than the lock class of the ht being used when deleting
171 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
172 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
173 * it's different than the ht->mutex here.
174 */
175 static struct lock_class_key tc_ht_lock_key;
176 static struct lock_class_key tc_ht_wq_key;
177
178 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
179 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
180 static void mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow,
181 struct mlx5_flow_attr *attr);
182
183 void
184 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
185 enum mlx5e_tc_attr_to_reg type,
186 u32 val,
187 u32 mask)
188 {
189 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
190 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
191 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
192 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
193 u32 max_mask = GENMASK(match_len - 1, 0);
194 __be32 curr_mask_be, curr_val_be;
195 u32 curr_mask, curr_val;
196
197 fmask = headers_c + soffset;
198 fval = headers_v + soffset;
199
200 memcpy(&curr_mask_be, fmask, 4);
201 memcpy(&curr_val_be, fval, 4);
202
203 curr_mask = be32_to_cpu(curr_mask_be);
204 curr_val = be32_to_cpu(curr_val_be);
205
206 //move to correct offset
207 WARN_ON(mask > max_mask);
208 mask <<= moffset;
209 val <<= moffset;
210 max_mask <<= moffset;
211
212 //zero val and mask
213 curr_mask &= ~max_mask;
214 curr_val &= ~max_mask;
215
216 //add current to mask
217 curr_mask |= mask;
218 curr_val |= val;
219
220 //back to be32 and write
221 curr_mask_be = cpu_to_be32(curr_mask);
222 curr_val_be = cpu_to_be32(curr_val);
223
224 memcpy(fmask, &curr_mask_be, 4);
225 memcpy(fval, &curr_val_be, 4);
226
227 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
228 }
229
230 void
231 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
232 enum mlx5e_tc_attr_to_reg type,
233 u32 *val,
234 u32 *mask)
235 {
236 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
237 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
238 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
239 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
240 u32 max_mask = GENMASK(match_len - 1, 0);
241 __be32 curr_mask_be, curr_val_be;
242 u32 curr_mask, curr_val;
243
244 fmask = headers_c + soffset;
245 fval = headers_v + soffset;
246
247 memcpy(&curr_mask_be, fmask, 4);
248 memcpy(&curr_val_be, fval, 4);
249
250 curr_mask = be32_to_cpu(curr_mask_be);
251 curr_val = be32_to_cpu(curr_val_be);
252
253 *mask = (curr_mask >> moffset) & max_mask;
254 *val = (curr_val >> moffset) & max_mask;
255 }
256
257 int
258 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
259 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
260 enum mlx5_flow_namespace_type ns,
261 enum mlx5e_tc_attr_to_reg type,
262 u32 data)
263 {
264 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
265 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
266 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
267 char *modact;
268 int err;
269
270 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
271 if (IS_ERR(modact))
272 return PTR_ERR(modact);
273
274 /* Firmware has 5bit length field and 0 means 32bits */
275 if (mlen == 32)
276 mlen = 0;
277
278 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
279 MLX5_SET(set_action_in, modact, field, mfield);
280 MLX5_SET(set_action_in, modact, offset, moffset);
281 MLX5_SET(set_action_in, modact, length, mlen);
282 MLX5_SET(set_action_in, modact, data, data);
283 err = mod_hdr_acts->num_actions;
284 mod_hdr_acts->num_actions++;
285
286 return err;
287 }
288
289 static struct mlx5e_tc_act_stats_handle *
290 get_act_stats_handle(struct mlx5e_priv *priv)
291 {
292 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
293 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
294 struct mlx5_rep_uplink_priv *uplink_priv;
295 struct mlx5e_rep_priv *uplink_rpriv;
296
297 if (is_mdev_switchdev_mode(priv->mdev)) {
298 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
299 uplink_priv = &uplink_rpriv->uplink_priv;
300
301 return uplink_priv->action_stats_handle;
302 }
303
304 return tc->action_stats_handle;
305 }
306
307 struct mlx5e_tc_int_port_priv *
308 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
309 {
310 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
311 struct mlx5_rep_uplink_priv *uplink_priv;
312 struct mlx5e_rep_priv *uplink_rpriv;
313
314 if (is_mdev_switchdev_mode(priv->mdev)) {
315 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
316 uplink_priv = &uplink_rpriv->uplink_priv;
317
318 return uplink_priv->int_port_priv;
319 }
320
321 return NULL;
322 }
323
324 struct mlx5e_flow_meters *
325 mlx5e_get_flow_meters(struct mlx5_core_dev *dev)
326 {
327 struct mlx5_eswitch *esw = dev->priv.eswitch;
328 struct mlx5_rep_uplink_priv *uplink_priv;
329 struct mlx5e_rep_priv *uplink_rpriv;
330 struct mlx5e_priv *priv;
331
332 if (is_mdev_switchdev_mode(dev)) {
333 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
334 uplink_priv = &uplink_rpriv->uplink_priv;
335 priv = netdev_priv(uplink_rpriv->netdev);
336 if (!uplink_priv->flow_meters)
337 uplink_priv->flow_meters =
338 mlx5e_flow_meters_init(priv,
339 MLX5_FLOW_NAMESPACE_FDB,
340 uplink_priv->post_act);
341 if (!IS_ERR(uplink_priv->flow_meters))
342 return uplink_priv->flow_meters;
343 }
344
345 return NULL;
346 }
347
348 static struct mlx5_tc_ct_priv *
349 get_ct_priv(struct mlx5e_priv *priv)
350 {
351 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
353 struct mlx5_rep_uplink_priv *uplink_priv;
354 struct mlx5e_rep_priv *uplink_rpriv;
355
356 if (is_mdev_switchdev_mode(priv->mdev)) {
357 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
358 uplink_priv = &uplink_rpriv->uplink_priv;
359
360 return uplink_priv->ct_priv;
361 }
362
363 return tc->ct;
364 }
365
366 static struct mlx5e_tc_psample *
367 get_sample_priv(struct mlx5e_priv *priv)
368 {
369 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
370 struct mlx5_rep_uplink_priv *uplink_priv;
371 struct mlx5e_rep_priv *uplink_rpriv;
372
373 if (is_mdev_switchdev_mode(priv->mdev)) {
374 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
375 uplink_priv = &uplink_rpriv->uplink_priv;
376
377 return uplink_priv->tc_psample;
378 }
379
380 return NULL;
381 }
382
383 static struct mlx5e_post_act *
384 get_post_action(struct mlx5e_priv *priv)
385 {
386 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
387 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
388 struct mlx5_rep_uplink_priv *uplink_priv;
389 struct mlx5e_rep_priv *uplink_rpriv;
390
391 if (is_mdev_switchdev_mode(priv->mdev)) {
392 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
393 uplink_priv = &uplink_rpriv->uplink_priv;
394
395 return uplink_priv->post_act;
396 }
397
398 return tc->post_act;
399 }
400
401 struct mlx5_flow_handle *
402 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
403 struct mlx5_flow_spec *spec,
404 struct mlx5_flow_attr *attr)
405 {
406 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
407
408 if (is_mdev_switchdev_mode(priv->mdev))
409 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
410
411 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
412 }
413
414 void
415 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
416 struct mlx5_flow_handle *rule,
417 struct mlx5_flow_attr *attr)
418 {
419 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
420
421 if (is_mdev_switchdev_mode(priv->mdev)) {
422 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
423 return;
424 }
425
426 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
427 }
428
429 static bool
430 is_flow_meter_action(struct mlx5_flow_attr *attr)
431 {
432 return (((attr->action & MLX5_FLOW_CONTEXT_ACTION_EXECUTE_ASO) &&
433 (attr->exe_aso_type == MLX5_EXE_ASO_FLOW_METER)) ||
434 attr->flags & MLX5_ATTR_FLAG_MTU);
435 }
436
437 static int
438 mlx5e_tc_add_flow_meter(struct mlx5e_priv *priv,
439 struct mlx5_flow_attr *attr)
440 {
441 struct mlx5e_post_act *post_act = get_post_action(priv);
442 struct mlx5e_post_meter_priv *post_meter;
443 enum mlx5_flow_namespace_type ns_type;
444 struct mlx5e_flow_meter_handle *meter;
445 enum mlx5e_post_meter_type type;
446
447 meter = mlx5e_tc_meter_replace(priv->mdev, &attr->meter_attr.params);
448 if (IS_ERR(meter)) {
449 mlx5_core_err(priv->mdev, "Failed to get flow meter\n");
450 return PTR_ERR(meter);
451 }
452
453 ns_type = mlx5e_tc_meter_get_namespace(meter->flow_meters);
454 type = meter->params.mtu ? MLX5E_POST_METER_MTU : MLX5E_POST_METER_RATE;
455 post_meter = mlx5e_post_meter_init(priv, ns_type, post_act,
456 type,
457 meter->act_counter, meter->drop_counter,
458 attr->branch_true, attr->branch_false);
459 if (IS_ERR(post_meter)) {
460 mlx5_core_err(priv->mdev, "Failed to init post meter\n");
461 goto err_meter_init;
462 }
463
464 attr->meter_attr.meter = meter;
465 attr->meter_attr.post_meter = post_meter;
466 attr->dest_ft = mlx5e_post_meter_get_ft(post_meter);
467 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
468
469 return 0;
470
471 err_meter_init:
472 mlx5e_tc_meter_put(meter);
473 return PTR_ERR(post_meter);
474 }
475
476 static void
477 mlx5e_tc_del_flow_meter(struct mlx5_eswitch *esw, struct mlx5_flow_attr *attr)
478 {
479 mlx5e_post_meter_cleanup(esw, attr->meter_attr.post_meter);
480 mlx5e_tc_meter_put(attr->meter_attr.meter);
481 }
482
483 struct mlx5_flow_handle *
484 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
485 struct mlx5_flow_spec *spec,
486 struct mlx5_flow_attr *attr)
487 {
488 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
489 int err;
490
491 if (!is_mdev_switchdev_mode(priv->mdev))
492 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
493
494 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
495 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
496
497 if (is_flow_meter_action(attr)) {
498 err = mlx5e_tc_add_flow_meter(priv, attr);
499 if (err)
500 return ERR_PTR(err);
501 }
502
503 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
504 }
505
506 void
507 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
508 struct mlx5_flow_handle *rule,
509 struct mlx5_flow_attr *attr)
510 {
511 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
512
513 if (!is_mdev_switchdev_mode(priv->mdev)) {
514 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
515 return;
516 }
517
518 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
519 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
520 return;
521 }
522
523 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
524
525 if (attr->meter_attr.meter)
526 mlx5e_tc_del_flow_meter(esw, attr);
527 }
528
529 int
530 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
531 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
532 enum mlx5_flow_namespace_type ns,
533 enum mlx5e_tc_attr_to_reg type,
534 u32 data)
535 {
536 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
537
538 return ret < 0 ? ret : 0;
539 }
540
541 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
542 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
543 enum mlx5e_tc_attr_to_reg type,
544 int act_id, u32 data)
545 {
546 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
547 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
548 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
549 char *modact;
550
551 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
552
553 /* Firmware has 5bit length field and 0 means 32bits */
554 if (mlen == 32)
555 mlen = 0;
556
557 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
558 MLX5_SET(set_action_in, modact, field, mfield);
559 MLX5_SET(set_action_in, modact, offset, moffset);
560 MLX5_SET(set_action_in, modact, length, mlen);
561 MLX5_SET(set_action_in, modact, data, data);
562 }
563
564 struct mlx5e_hairpin {
565 struct mlx5_hairpin *pair;
566
567 struct mlx5_core_dev *func_mdev;
568 struct mlx5e_priv *func_priv;
569 u32 tdn;
570 struct mlx5e_tir direct_tir;
571
572 int num_channels;
573 u8 log_num_packets;
574 struct mlx5e_rqt indir_rqt;
575 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
576 struct mlx5_ttc_table *ttc;
577 };
578
579 struct mlx5e_hairpin_entry {
580 /* a node of a hash table which keeps all the hairpin entries */
581 struct hlist_node hairpin_hlist;
582
583 /* protects flows list */
584 spinlock_t flows_lock;
585 /* flows sharing the same hairpin */
586 struct list_head flows;
587 /* hpe's that were not fully initialized when dead peer update event
588 * function traversed them.
589 */
590 struct list_head dead_peer_wait_list;
591
592 u16 peer_vhca_id;
593 u8 prio;
594 struct mlx5e_hairpin *hp;
595 refcount_t refcnt;
596 struct completion res_ready;
597 };
598
599 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
600 struct mlx5e_tc_flow *flow);
601
602 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
603 {
604 if (!flow || !refcount_inc_not_zero(&flow->refcnt))
605 return ERR_PTR(-EINVAL);
606 return flow;
607 }
608
609 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
610 {
611 if (refcount_dec_and_test(&flow->refcnt)) {
612 mlx5e_tc_del_flow(priv, flow);
613 kfree_rcu(flow, rcu_head);
614 }
615 }
616
617 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
618 {
619 return flow_flag_test(flow, ESWITCH);
620 }
621
622 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
623 {
624 return flow_flag_test(flow, FT);
625 }
626
627 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
628 {
629 return flow_flag_test(flow, OFFLOADED);
630 }
631
632 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
633 {
634 return mlx5e_is_eswitch_flow(flow) ?
635 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
636 }
637
638 static struct mlx5_core_dev *
639 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
640 {
641 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
642 }
643
644 static struct mod_hdr_tbl *
645 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
646 {
647 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
648 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
649
650 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
651 &esw->offloads.mod_hdr :
652 &tc->mod_hdr;
653 }
654
655 int mlx5e_tc_attach_mod_hdr(struct mlx5e_priv *priv,
656 struct mlx5e_tc_flow *flow,
657 struct mlx5_flow_attr *attr)
658 {
659 struct mlx5e_mod_hdr_handle *mh;
660
661 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
662 mlx5e_get_flow_namespace(flow),
663 &attr->parse_attr->mod_hdr_acts);
664 if (IS_ERR(mh))
665 return PTR_ERR(mh);
666
667 WARN_ON(attr->modify_hdr);
668 attr->modify_hdr = mlx5e_mod_hdr_get(mh);
669 attr->mh = mh;
670
671 return 0;
672 }
673
674 void mlx5e_tc_detach_mod_hdr(struct mlx5e_priv *priv,
675 struct mlx5e_tc_flow *flow,
676 struct mlx5_flow_attr *attr)
677 {
678 /* flow wasn't fully initialized */
679 if (!attr->mh)
680 return;
681
682 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
683 attr->mh);
684 attr->mh = NULL;
685 }
686
687 static
688 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
689 {
690 struct mlx5_core_dev *mdev;
691 struct net_device *netdev;
692 struct mlx5e_priv *priv;
693
694 netdev = dev_get_by_index(net, ifindex);
695 if (!netdev)
696 return ERR_PTR(-ENODEV);
697
698 priv = netdev_priv(netdev);
699 mdev = priv->mdev;
700 dev_put(netdev);
701
702 /* Mirred tc action holds a refcount on the ifindex net_device (see
703 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
704 * after dev_put(netdev), while we're in the context of adding a tc flow.
705 *
706 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
707 * stored in a hairpin object, which exists until all flows, that refer to it, get
708 * removed.
709 *
710 * On the other hand, after a hairpin object has been created, the peer net_device may
711 * be removed/unbound while there are still some hairpin flows that are using it. This
712 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
713 * NETDEV_UNREGISTER event of the peer net_device.
714 */
715 return mdev;
716 }
717
718 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
719 {
720 struct mlx5e_tir_builder *builder;
721 int err;
722
723 builder = mlx5e_tir_builder_alloc(false);
724 if (!builder)
725 return -ENOMEM;
726
727 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
728 if (err)
729 goto out;
730
731 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
732 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
733 if (err)
734 goto create_tir_err;
735
736 out:
737 mlx5e_tir_builder_free(builder);
738 return err;
739
740 create_tir_err:
741 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
742
743 goto out;
744 }
745
746 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
747 {
748 mlx5e_tir_destroy(&hp->direct_tir);
749 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
750 }
751
752 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
753 {
754 struct mlx5e_priv *priv = hp->func_priv;
755 struct mlx5_core_dev *mdev = priv->mdev;
756 struct mlx5e_rss_params_indir *indir;
757 int err;
758
759 indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
760 if (!indir)
761 return -ENOMEM;
762
763 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
764 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
765 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
766 indir);
767
768 kvfree(indir);
769 return err;
770 }
771
772 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
773 {
774 struct mlx5e_priv *priv = hp->func_priv;
775 struct mlx5e_rss_params_hash rss_hash;
776 enum mlx5_traffic_types tt, max_tt;
777 struct mlx5e_tir_builder *builder;
778 int err = 0;
779
780 builder = mlx5e_tir_builder_alloc(false);
781 if (!builder)
782 return -ENOMEM;
783
784 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
785
786 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
787 struct mlx5e_rss_params_traffic_type rss_tt;
788
789 rss_tt = mlx5e_rss_get_default_tt_config(tt);
790
791 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
792 mlx5e_rqt_get_rqtn(&hp->indir_rqt),
793 false);
794 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
795
796 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
797 if (err) {
798 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
799 goto err_destroy_tirs;
800 }
801
802 mlx5e_tir_builder_clear(builder);
803 }
804
805 out:
806 mlx5e_tir_builder_free(builder);
807 return err;
808
809 err_destroy_tirs:
810 max_tt = tt;
811 for (tt = 0; tt < max_tt; tt++)
812 mlx5e_tir_destroy(&hp->indir_tir[tt]);
813
814 goto out;
815 }
816
817 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
818 {
819 int tt;
820
821 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
822 mlx5e_tir_destroy(&hp->indir_tir[tt]);
823 }
824
825 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
826 struct ttc_params *ttc_params)
827 {
828 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
829 int tt;
830
831 memset(ttc_params, 0, sizeof(*ttc_params));
832
833 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
834 MLX5_FLOW_NAMESPACE_KERNEL);
835 for (tt = 0; tt < MLX5_NUM_TT; tt++) {
836 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
837 ttc_params->dests[tt].tir_num =
838 tt == MLX5_TT_ANY ?
839 mlx5e_tir_get_tirn(&hp->direct_tir) :
840 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
841 }
842
843 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
844 ft_attr->prio = MLX5E_TC_PRIO;
845 }
846
847 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
848 {
849 struct mlx5e_priv *priv = hp->func_priv;
850 struct ttc_params ttc_params;
851 struct mlx5_ttc_table *ttc;
852 int err;
853
854 err = mlx5e_hairpin_create_indirect_rqt(hp);
855 if (err)
856 return err;
857
858 err = mlx5e_hairpin_create_indirect_tirs(hp);
859 if (err)
860 goto err_create_indirect_tirs;
861
862 mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
863 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
864 if (IS_ERR(hp->ttc)) {
865 err = PTR_ERR(hp->ttc);
866 goto err_create_ttc_table;
867 }
868
869 ttc = mlx5e_fs_get_ttc(priv->fs, false);
870 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
871 hp->num_channels,
872 mlx5_get_ttc_flow_table(ttc)->id);
873
874 return 0;
875
876 err_create_ttc_table:
877 mlx5e_hairpin_destroy_indirect_tirs(hp);
878 err_create_indirect_tirs:
879 mlx5e_rqt_destroy(&hp->indir_rqt);
880
881 return err;
882 }
883
884 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
885 {
886 mlx5_destroy_ttc_table(hp->ttc);
887 mlx5e_hairpin_destroy_indirect_tirs(hp);
888 mlx5e_rqt_destroy(&hp->indir_rqt);
889 }
890
891 static struct mlx5e_hairpin *
892 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
893 int peer_ifindex)
894 {
895 struct mlx5_core_dev *func_mdev, *peer_mdev;
896 struct mlx5e_hairpin *hp;
897 struct mlx5_hairpin *pair;
898 int err;
899
900 hp = kzalloc(sizeof(*hp), GFP_KERNEL);
901 if (!hp)
902 return ERR_PTR(-ENOMEM);
903
904 func_mdev = priv->mdev;
905 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
906 if (IS_ERR(peer_mdev)) {
907 err = PTR_ERR(peer_mdev);
908 goto create_pair_err;
909 }
910
911 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
912 if (IS_ERR(pair)) {
913 err = PTR_ERR(pair);
914 goto create_pair_err;
915 }
916 hp->pair = pair;
917 hp->func_mdev = func_mdev;
918 hp->func_priv = priv;
919 hp->num_channels = params->num_channels;
920 hp->log_num_packets = params->log_num_packets;
921
922 err = mlx5e_hairpin_create_transport(hp);
923 if (err)
924 goto create_transport_err;
925
926 if (hp->num_channels > 1) {
927 err = mlx5e_hairpin_rss_init(hp);
928 if (err)
929 goto rss_init_err;
930 }
931
932 return hp;
933
934 rss_init_err:
935 mlx5e_hairpin_destroy_transport(hp);
936 create_transport_err:
937 mlx5_core_hairpin_destroy(hp->pair);
938 create_pair_err:
939 kfree(hp);
940 return ERR_PTR(err);
941 }
942
943 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
944 {
945 if (hp->num_channels > 1)
946 mlx5e_hairpin_rss_cleanup(hp);
947 mlx5e_hairpin_destroy_transport(hp);
948 mlx5_core_hairpin_destroy(hp->pair);
949 kvfree(hp);
950 }
951
952 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
953 {
954 return (peer_vhca_id << 16 | prio);
955 }
956
957 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
958 u16 peer_vhca_id, u8 prio)
959 {
960 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
961 struct mlx5e_hairpin_entry *hpe;
962 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
963
964 hash_for_each_possible(tc->hairpin_tbl, hpe,
965 hairpin_hlist, hash_key) {
966 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
967 refcount_inc(&hpe->refcnt);
968 return hpe;
969 }
970 }
971
972 return NULL;
973 }
974
975 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
976 struct mlx5e_hairpin_entry *hpe)
977 {
978 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
979 /* no more hairpin flows for us, release the hairpin pair */
980 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &tc->hairpin_tbl_lock))
981 return;
982 hash_del(&hpe->hairpin_hlist);
983 mutex_unlock(&tc->hairpin_tbl_lock);
984
985 if (!IS_ERR_OR_NULL(hpe->hp)) {
986 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
987 dev_name(hpe->hp->pair->peer_mdev->device));
988
989 mlx5e_hairpin_destroy(hpe->hp);
990 }
991
992 WARN_ON(!list_empty(&hpe->flows));
993 kfree(hpe);
994 }
995
996 #define UNKNOWN_MATCH_PRIO 8
997
998 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
999 struct mlx5_flow_spec *spec, u8 *match_prio,
1000 struct netlink_ext_ack *extack)
1001 {
1002 void *headers_c, *headers_v;
1003 u8 prio_val, prio_mask = 0;
1004 bool vlan_present;
1005
1006 #ifdef CONFIG_MLX5_CORE_EN_DCB
1007 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
1008 NL_SET_ERR_MSG_MOD(extack,
1009 "only PCP trust state supported for hairpin");
1010 return -EOPNOTSUPP;
1011 }
1012 #endif
1013 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1014 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1015
1016 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
1017 if (vlan_present) {
1018 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
1019 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
1020 }
1021
1022 if (!vlan_present || !prio_mask) {
1023 prio_val = UNKNOWN_MATCH_PRIO;
1024 } else if (prio_mask != 0x7) {
1025 NL_SET_ERR_MSG_MOD(extack,
1026 "masked priority match not supported for hairpin");
1027 return -EOPNOTSUPP;
1028 }
1029
1030 *match_prio = prio_val;
1031 return 0;
1032 }
1033
1034 static int debugfs_hairpin_num_active_get(void *data, u64 *val)
1035 {
1036 struct mlx5e_tc_table *tc = data;
1037 struct mlx5e_hairpin_entry *hpe;
1038 u32 cnt = 0;
1039 u32 bkt;
1040
1041 mutex_lock(&tc->hairpin_tbl_lock);
1042 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1043 cnt++;
1044 mutex_unlock(&tc->hairpin_tbl_lock);
1045
1046 *val = cnt;
1047
1048 return 0;
1049 }
1050 DEFINE_DEBUGFS_ATTRIBUTE(fops_hairpin_num_active,
1051 debugfs_hairpin_num_active_get, NULL, "%llu\n");
1052
1053 static int debugfs_hairpin_table_dump_show(struct seq_file *file, void *priv)
1054
1055 {
1056 struct mlx5e_tc_table *tc = file->private;
1057 struct mlx5e_hairpin_entry *hpe;
1058 u32 bkt;
1059
1060 mutex_lock(&tc->hairpin_tbl_lock);
1061 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
1062 seq_printf(file,
1063 "Hairpin peer_vhca_id %u prio %u refcnt %u num_channels %u num_packets %lu\n",
1064 hpe->peer_vhca_id, hpe->prio,
1065 refcount_read(&hpe->refcnt), hpe->hp->num_channels,
1066 BIT(hpe->hp->log_num_packets));
1067 mutex_unlock(&tc->hairpin_tbl_lock);
1068
1069 return 0;
1070 }
1071 DEFINE_SHOW_ATTRIBUTE(debugfs_hairpin_table_dump);
1072
1073 static void mlx5e_tc_debugfs_init(struct mlx5e_tc_table *tc,
1074 struct dentry *dfs_root)
1075 {
1076 if (IS_ERR_OR_NULL(dfs_root))
1077 return;
1078
1079 tc->dfs_root = debugfs_create_dir("tc", dfs_root);
1080
1081 debugfs_create_file("hairpin_num_active", 0444, tc->dfs_root, tc,
1082 &fops_hairpin_num_active);
1083 debugfs_create_file("hairpin_table_dump", 0444, tc->dfs_root, tc,
1084 &debugfs_hairpin_table_dump_fops);
1085 }
1086
1087 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
1088 struct mlx5e_tc_flow *flow,
1089 struct mlx5e_tc_flow_parse_attr *parse_attr,
1090 struct netlink_ext_ack *extack)
1091 {
1092 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1093 struct devlink *devlink = priv_to_devlink(priv->mdev);
1094 int peer_ifindex = parse_attr->mirred_ifindex[0];
1095 union devlink_param_value val = {};
1096 struct mlx5_hairpin_params params;
1097 struct mlx5_core_dev *peer_mdev;
1098 struct mlx5e_hairpin_entry *hpe;
1099 struct mlx5e_hairpin *hp;
1100 u8 match_prio;
1101 u16 peer_id;
1102 int err;
1103
1104 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
1105 if (IS_ERR(peer_mdev)) {
1106 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
1107 return PTR_ERR(peer_mdev);
1108 }
1109
1110 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
1111 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
1112 return -EOPNOTSUPP;
1113 }
1114
1115 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
1116 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
1117 extack);
1118 if (err)
1119 return err;
1120
1121 mutex_lock(&tc->hairpin_tbl_lock);
1122 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
1123 if (hpe) {
1124 mutex_unlock(&tc->hairpin_tbl_lock);
1125 wait_for_completion(&hpe->res_ready);
1126
1127 if (IS_ERR(hpe->hp)) {
1128 err = -EREMOTEIO;
1129 goto out_err;
1130 }
1131 goto attach_flow;
1132 }
1133
1134 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
1135 if (!hpe) {
1136 mutex_unlock(&tc->hairpin_tbl_lock);
1137 return -ENOMEM;
1138 }
1139
1140 spin_lock_init(&hpe->flows_lock);
1141 INIT_LIST_HEAD(&hpe->flows);
1142 INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
1143 hpe->peer_vhca_id = peer_id;
1144 hpe->prio = match_prio;
1145 refcount_set(&hpe->refcnt, 1);
1146 init_completion(&hpe->res_ready);
1147
1148 hash_add(tc->hairpin_tbl, &hpe->hairpin_hlist,
1149 hash_hairpin_info(peer_id, match_prio));
1150 mutex_unlock(&tc->hairpin_tbl_lock);
1151
1152 err = devl_param_driverinit_value_get(
1153 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, &val);
1154 if (err) {
1155 err = -ENOMEM;
1156 goto out_err;
1157 }
1158
1159 params.log_num_packets = ilog2(val.vu32);
1160 params.log_data_size =
1161 clamp_t(u32,
1162 params.log_num_packets +
1163 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev),
1164 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz),
1165 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
1166
1167 params.q_counter = priv->q_counter;
1168 err = devl_param_driverinit_value_get(
1169 devlink, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, &val);
1170 if (err) {
1171 err = -ENOMEM;
1172 goto out_err;
1173 }
1174
1175 params.num_channels = val.vu32;
1176
1177 hp = mlx5e_hairpin_create(priv, &params, peer_ifindex);
1178 hpe->hp = hp;
1179 complete_all(&hpe->res_ready);
1180 if (IS_ERR(hp)) {
1181 err = PTR_ERR(hp);
1182 goto out_err;
1183 }
1184
1185 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
1186 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
1187 dev_name(hp->pair->peer_mdev->device),
1188 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
1189
1190 attach_flow:
1191 if (hpe->hp->num_channels > 1) {
1192 flow_flag_set(flow, HAIRPIN_RSS);
1193 flow->attr->nic_attr->hairpin_ft =
1194 mlx5_get_ttc_flow_table(hpe->hp->ttc);
1195 } else {
1196 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
1197 }
1198
1199 flow->hpe = hpe;
1200 spin_lock(&hpe->flows_lock);
1201 list_add(&flow->hairpin, &hpe->flows);
1202 spin_unlock(&hpe->flows_lock);
1203
1204 return 0;
1205
1206 out_err:
1207 mlx5e_hairpin_put(priv, hpe);
1208 return err;
1209 }
1210
1211 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
1212 struct mlx5e_tc_flow *flow)
1213 {
1214 /* flow wasn't fully initialized */
1215 if (!flow->hpe)
1216 return;
1217
1218 spin_lock(&flow->hpe->flows_lock);
1219 list_del(&flow->hairpin);
1220 spin_unlock(&flow->hpe->flows_lock);
1221
1222 mlx5e_hairpin_put(priv, flow->hpe);
1223 flow->hpe = NULL;
1224 }
1225
1226 struct mlx5_flow_handle *
1227 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
1228 struct mlx5_flow_spec *spec,
1229 struct mlx5_flow_attr *attr)
1230 {
1231 struct mlx5_flow_context *flow_context = &spec->flow_context;
1232 struct mlx5e_vlan_table *vlan = mlx5e_fs_get_vlan(priv->fs);
1233 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1234 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1235 struct mlx5_flow_destination dest[2] = {};
1236 struct mlx5_fs_chains *nic_chains;
1237 struct mlx5_flow_act flow_act = {
1238 .action = attr->action,
1239 .flags = FLOW_ACT_NO_APPEND,
1240 };
1241 struct mlx5_flow_handle *rule;
1242 struct mlx5_flow_table *ft;
1243 int dest_ix = 0;
1244
1245 nic_chains = mlx5e_nic_chains(tc);
1246 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1247 flow_context->flow_tag = nic_attr->flow_tag;
1248
1249 if (attr->dest_ft) {
1250 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1251 dest[dest_ix].ft = attr->dest_ft;
1252 dest_ix++;
1253 } else if (nic_attr->hairpin_ft) {
1254 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1255 dest[dest_ix].ft = nic_attr->hairpin_ft;
1256 dest_ix++;
1257 } else if (nic_attr->hairpin_tirn) {
1258 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1259 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1260 dest_ix++;
1261 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1262 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1263 if (attr->dest_chain) {
1264 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1265 attr->dest_chain, 1,
1266 MLX5E_TC_FT_LEVEL);
1267 if (IS_ERR(dest[dest_ix].ft))
1268 return ERR_CAST(dest[dest_ix].ft);
1269 } else {
1270 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(vlan);
1271 }
1272 dest_ix++;
1273 }
1274
1275 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1276 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1277 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1278
1279 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1280 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1281 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1282 dest_ix++;
1283 }
1284
1285 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1286 flow_act.modify_hdr = attr->modify_hdr;
1287
1288 mutex_lock(&tc->t_lock);
1289 if (IS_ERR_OR_NULL(tc->t)) {
1290 /* Create the root table here if doesn't exist yet */
1291 tc->t =
1292 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1293
1294 if (IS_ERR(tc->t)) {
1295 mutex_unlock(&tc->t_lock);
1296 netdev_err(priv->netdev,
1297 "Failed to create tc offload table\n");
1298 rule = ERR_CAST(tc->t);
1299 goto err_ft_get;
1300 }
1301 }
1302 mutex_unlock(&tc->t_lock);
1303
1304 if (attr->chain || attr->prio)
1305 ft = mlx5_chains_get_table(nic_chains,
1306 attr->chain, attr->prio,
1307 MLX5E_TC_FT_LEVEL);
1308 else
1309 ft = attr->ft;
1310
1311 if (IS_ERR(ft)) {
1312 rule = ERR_CAST(ft);
1313 goto err_ft_get;
1314 }
1315
1316 if (attr->outer_match_level != MLX5_MATCH_NONE)
1317 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1318
1319 rule = mlx5_add_flow_rules(ft, spec,
1320 &flow_act, dest, dest_ix);
1321 if (IS_ERR(rule))
1322 goto err_rule;
1323
1324 return rule;
1325
1326 err_rule:
1327 if (attr->chain || attr->prio)
1328 mlx5_chains_put_table(nic_chains,
1329 attr->chain, attr->prio,
1330 MLX5E_TC_FT_LEVEL);
1331 err_ft_get:
1332 if (attr->dest_chain)
1333 mlx5_chains_put_table(nic_chains,
1334 attr->dest_chain, 1,
1335 MLX5E_TC_FT_LEVEL);
1336
1337 return ERR_CAST(rule);
1338 }
1339
1340 static int
1341 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1342 struct mlx5_flow_attr *attr)
1343
1344 {
1345 struct mlx5_fc *counter;
1346
1347 counter = mlx5_fc_create(counter_dev, true);
1348 if (IS_ERR(counter))
1349 return PTR_ERR(counter);
1350
1351 attr->counter = counter;
1352 return 0;
1353 }
1354
1355 static int
1356 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1357 struct mlx5e_tc_flow *flow,
1358 struct netlink_ext_ack *extack)
1359 {
1360 struct mlx5e_tc_flow_parse_attr *parse_attr;
1361 struct mlx5_flow_attr *attr = flow->attr;
1362 struct mlx5_core_dev *dev = priv->mdev;
1363 int err;
1364
1365 parse_attr = attr->parse_attr;
1366
1367 if (flow_flag_test(flow, HAIRPIN)) {
1368 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1369 if (err)
1370 return err;
1371 }
1372
1373 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1374 err = alloc_flow_attr_counter(dev, attr);
1375 if (err)
1376 return err;
1377 }
1378
1379 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1380 err = mlx5e_tc_attach_mod_hdr(priv, flow, attr);
1381 if (err)
1382 return err;
1383 }
1384
1385 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec, attr);
1386 return PTR_ERR_OR_ZERO(flow->rule[0]);
1387 }
1388
1389 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1390 struct mlx5_flow_handle *rule,
1391 struct mlx5_flow_attr *attr)
1392 {
1393 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1394 struct mlx5_fs_chains *nic_chains;
1395
1396 nic_chains = mlx5e_nic_chains(tc);
1397 mlx5_del_flow_rules(rule);
1398
1399 if (attr->chain || attr->prio)
1400 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1401 MLX5E_TC_FT_LEVEL);
1402
1403 if (attr->dest_chain)
1404 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1405 MLX5E_TC_FT_LEVEL);
1406 }
1407
1408 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1409 struct mlx5e_tc_flow *flow)
1410 {
1411 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
1412 struct mlx5_flow_attr *attr = flow->attr;
1413
1414 flow_flag_clear(flow, OFFLOADED);
1415
1416 if (!IS_ERR_OR_NULL(flow->rule[0]))
1417 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1418
1419 /* Remove root table if no rules are left to avoid
1420 * extra steering hops.
1421 */
1422 mutex_lock(&tc->t_lock);
1423 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1424 !IS_ERR_OR_NULL(tc->t)) {
1425 mlx5_chains_put_table(mlx5e_nic_chains(tc), 0, 1, MLX5E_TC_FT_LEVEL);
1426 tc->t = NULL;
1427 }
1428 mutex_unlock(&tc->t_lock);
1429
1430 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1431 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1432 mlx5e_tc_detach_mod_hdr(priv, flow, attr);
1433 }
1434
1435 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1436 mlx5_fc_destroy(priv->mdev, attr->counter);
1437
1438 if (flow_flag_test(flow, HAIRPIN))
1439 mlx5e_hairpin_flow_del(priv, flow);
1440
1441 free_flow_post_acts(flow);
1442
1443 kvfree(attr->parse_attr);
1444 kfree(flow->attr);
1445 }
1446
1447 struct mlx5_flow_handle *
1448 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1449 struct mlx5e_tc_flow *flow,
1450 struct mlx5_flow_spec *spec,
1451 struct mlx5_flow_attr *attr)
1452 {
1453 struct mlx5_flow_handle *rule;
1454
1455 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1456 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1457
1458 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1459
1460 if (IS_ERR(rule))
1461 return rule;
1462
1463 if (attr->esw_attr->split_count) {
1464 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1465 if (IS_ERR(flow->rule[1]))
1466 goto err_rule1;
1467 }
1468
1469 return rule;
1470
1471 err_rule1:
1472 mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1473 return flow->rule[1];
1474 }
1475
1476 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1477 struct mlx5e_tc_flow *flow,
1478 struct mlx5_flow_attr *attr)
1479 {
1480 flow_flag_clear(flow, OFFLOADED);
1481
1482 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1483 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1484
1485 if (attr->esw_attr->split_count)
1486 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1487
1488 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1489 }
1490
1491 struct mlx5_flow_handle *
1492 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1493 struct mlx5e_tc_flow *flow,
1494 struct mlx5_flow_spec *spec)
1495 {
1496 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
1497 struct mlx5e_mod_hdr_handle *mh = NULL;
1498 struct mlx5_flow_attr *slow_attr;
1499 struct mlx5_flow_handle *rule;
1500 bool fwd_and_modify_cap;
1501 u32 chain_mapping = 0;
1502 int err;
1503
1504 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1505 if (!slow_attr)
1506 return ERR_PTR(-ENOMEM);
1507
1508 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1509 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1510 slow_attr->esw_attr->split_count = 0;
1511 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1512
1513 fwd_and_modify_cap = MLX5_CAP_ESW_FLOWTABLE((esw)->dev, fdb_modify_header_fwd_to_table);
1514 if (!fwd_and_modify_cap)
1515 goto skip_restore;
1516
1517 err = mlx5_chains_get_chain_mapping(esw_chains(esw), flow->attr->chain, &chain_mapping);
1518 if (err)
1519 goto err_get_chain;
1520
1521 err = mlx5e_tc_match_to_reg_set(esw->dev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
1522 MAPPED_OBJ_TO_REG, chain_mapping);
1523 if (err)
1524 goto err_reg_set;
1525
1526 mh = mlx5e_mod_hdr_attach(esw->dev, get_mod_hdr_table(flow->priv, flow),
1527 MLX5_FLOW_NAMESPACE_FDB, &mod_acts);
1528 if (IS_ERR(mh)) {
1529 err = PTR_ERR(mh);
1530 goto err_attach;
1531 }
1532
1533 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1534 slow_attr->modify_hdr = mlx5e_mod_hdr_get(mh);
1535
1536 skip_restore:
1537 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1538 if (IS_ERR(rule)) {
1539 err = PTR_ERR(rule);
1540 goto err_offload;
1541 }
1542
1543 flow->attr->slow_mh = mh;
1544 flow->chain_mapping = chain_mapping;
1545 flow_flag_set(flow, SLOW);
1546
1547 mlx5e_mod_hdr_dealloc(&mod_acts);
1548 kfree(slow_attr);
1549
1550 return rule;
1551
1552 err_offload:
1553 if (fwd_and_modify_cap)
1554 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), mh);
1555 err_attach:
1556 err_reg_set:
1557 if (fwd_and_modify_cap)
1558 mlx5_chains_put_chain_mapping(esw_chains(esw), chain_mapping);
1559 err_get_chain:
1560 mlx5e_mod_hdr_dealloc(&mod_acts);
1561 kfree(slow_attr);
1562 return ERR_PTR(err);
1563 }
1564
1565 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1566 struct mlx5e_tc_flow *flow)
1567 {
1568 struct mlx5e_mod_hdr_handle *slow_mh = flow->attr->slow_mh;
1569 struct mlx5_flow_attr *slow_attr;
1570
1571 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1572 if (!slow_attr) {
1573 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1574 return;
1575 }
1576
1577 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1578 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1579 slow_attr->esw_attr->split_count = 0;
1580 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1581 if (slow_mh) {
1582 slow_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1583 slow_attr->modify_hdr = mlx5e_mod_hdr_get(slow_mh);
1584 }
1585 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1586 if (slow_mh) {
1587 mlx5e_mod_hdr_detach(esw->dev, get_mod_hdr_table(flow->priv, flow), slow_mh);
1588 mlx5_chains_put_chain_mapping(esw_chains(esw), flow->chain_mapping);
1589 flow->chain_mapping = 0;
1590 flow->attr->slow_mh = NULL;
1591 }
1592 flow_flag_clear(flow, SLOW);
1593 kfree(slow_attr);
1594 }
1595
1596 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1597 * function.
1598 */
1599 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1600 struct list_head *unready_flows)
1601 {
1602 flow_flag_set(flow, NOT_READY);
1603 list_add_tail(&flow->unready, unready_flows);
1604 }
1605
1606 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1607 * function.
1608 */
1609 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1610 {
1611 list_del(&flow->unready);
1612 flow_flag_clear(flow, NOT_READY);
1613 }
1614
1615 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1616 {
1617 struct mlx5_rep_uplink_priv *uplink_priv;
1618 struct mlx5e_rep_priv *rpriv;
1619 struct mlx5_eswitch *esw;
1620
1621 esw = flow->priv->mdev->priv.eswitch;
1622 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1623 uplink_priv = &rpriv->uplink_priv;
1624
1625 mutex_lock(&uplink_priv->unready_flows_lock);
1626 unready_flow_add(flow, &uplink_priv->unready_flows);
1627 mutex_unlock(&uplink_priv->unready_flows_lock);
1628 }
1629
1630 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1631 {
1632 struct mlx5_rep_uplink_priv *uplink_priv;
1633 struct mlx5e_rep_priv *rpriv;
1634 struct mlx5_eswitch *esw;
1635
1636 esw = flow->priv->mdev->priv.eswitch;
1637 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1638 uplink_priv = &rpriv->uplink_priv;
1639
1640 mutex_lock(&uplink_priv->unready_flows_lock);
1641 unready_flow_del(flow);
1642 mutex_unlock(&uplink_priv->unready_flows_lock);
1643 }
1644
1645 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1646 {
1647 struct mlx5_core_dev *out_mdev, *route_mdev;
1648 struct mlx5e_priv *out_priv, *route_priv;
1649
1650 out_priv = netdev_priv(out_dev);
1651 out_mdev = out_priv->mdev;
1652 route_priv = netdev_priv(route_dev);
1653 route_mdev = route_priv->mdev;
1654
1655 if (out_mdev->coredev_type != MLX5_COREDEV_PF)
1656 return false;
1657
1658 if (route_mdev->coredev_type != MLX5_COREDEV_VF &&
1659 route_mdev->coredev_type != MLX5_COREDEV_SF)
1660 return false;
1661
1662 return mlx5e_same_hw_devs(out_priv, route_priv);
1663 }
1664
1665 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1666 {
1667 struct mlx5e_priv *out_priv, *route_priv;
1668 struct mlx5_devcom *devcom = NULL;
1669 struct mlx5_core_dev *route_mdev;
1670 struct mlx5_eswitch *esw;
1671 u16 vhca_id;
1672 int err;
1673
1674 out_priv = netdev_priv(out_dev);
1675 esw = out_priv->mdev->priv.eswitch;
1676 route_priv = netdev_priv(route_dev);
1677 route_mdev = route_priv->mdev;
1678
1679 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1680 if (mlx5_lag_is_active(out_priv->mdev)) {
1681 /* In lag case we may get devices from different eswitch instances.
1682 * If we failed to get vport num, it means, mostly, that we on the wrong
1683 * eswitch.
1684 */
1685 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1686 if (err != -ENOENT)
1687 return err;
1688
1689 devcom = out_priv->mdev->priv.devcom;
1690 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1691 if (!esw)
1692 return -ENODEV;
1693 }
1694
1695 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1696 if (devcom)
1697 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1698 return err;
1699 }
1700
1701 static int
1702 set_encap_dests(struct mlx5e_priv *priv,
1703 struct mlx5e_tc_flow *flow,
1704 struct mlx5_flow_attr *attr,
1705 struct netlink_ext_ack *extack,
1706 bool *vf_tun)
1707 {
1708 struct mlx5e_tc_flow_parse_attr *parse_attr;
1709 struct mlx5_esw_flow_attr *esw_attr;
1710 struct net_device *encap_dev = NULL;
1711 struct mlx5e_rep_priv *rpriv;
1712 struct mlx5e_priv *out_priv;
1713 int out_index;
1714 int err = 0;
1715
1716 if (!mlx5e_is_eswitch_flow(flow))
1717 return 0;
1718
1719 parse_attr = attr->parse_attr;
1720 esw_attr = attr->esw_attr;
1721 *vf_tun = false;
1722
1723 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1724 struct net_device *out_dev;
1725 int mirred_ifindex;
1726
1727 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1728 continue;
1729
1730 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1731 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1732 if (!out_dev) {
1733 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1734 err = -ENODEV;
1735 goto out;
1736 }
1737 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1738 extack, &encap_dev);
1739 dev_put(out_dev);
1740 if (err)
1741 goto out;
1742
1743 if (esw_attr->dests[out_index].flags &
1744 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1745 !esw_attr->dest_int_port)
1746 *vf_tun = true;
1747
1748 out_priv = netdev_priv(encap_dev);
1749 rpriv = out_priv->ppriv;
1750 esw_attr->dests[out_index].rep = rpriv->rep;
1751 esw_attr->dests[out_index].mdev = out_priv->mdev;
1752 }
1753
1754 if (*vf_tun && esw_attr->out_count > 1) {
1755 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1756 err = -EOPNOTSUPP;
1757 goto out;
1758 }
1759
1760 out:
1761 return err;
1762 }
1763
1764 static void
1765 clean_encap_dests(struct mlx5e_priv *priv,
1766 struct mlx5e_tc_flow *flow,
1767 struct mlx5_flow_attr *attr)
1768 {
1769 struct mlx5_esw_flow_attr *esw_attr;
1770 int out_index;
1771
1772 if (!mlx5e_is_eswitch_flow(flow))
1773 return;
1774
1775 esw_attr = attr->esw_attr;
1776
1777 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1778 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1779 continue;
1780
1781 mlx5e_detach_encap(priv, flow, attr, out_index);
1782 kfree(attr->parse_attr->tun_info[out_index]);
1783 }
1784 }
1785
1786 static int
1787 verify_attr_actions(u32 actions, struct netlink_ext_ack *extack)
1788 {
1789 if (!(actions &
1790 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1791 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
1792 return -EOPNOTSUPP;
1793 }
1794
1795 if (!(~actions &
1796 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
1797 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
1798 return -EOPNOTSUPP;
1799 }
1800
1801 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
1802 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
1803 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
1804 return -EOPNOTSUPP;
1805 }
1806
1807 return 0;
1808 }
1809
1810 static int
1811 post_process_attr(struct mlx5e_tc_flow *flow,
1812 struct mlx5_flow_attr *attr,
1813 struct netlink_ext_ack *extack)
1814 {
1815 bool vf_tun;
1816 int err = 0;
1817
1818 err = verify_attr_actions(attr->action, extack);
1819 if (err)
1820 goto err_out;
1821
1822 err = set_encap_dests(flow->priv, flow, attr, extack, &vf_tun);
1823 if (err)
1824 goto err_out;
1825
1826 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1827 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr);
1828 if (err)
1829 goto err_out;
1830 }
1831
1832 if (attr->branch_true &&
1833 attr->branch_true->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1834 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_true);
1835 if (err)
1836 goto err_out;
1837 }
1838
1839 if (attr->branch_false &&
1840 attr->branch_false->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1841 err = mlx5e_tc_attach_mod_hdr(flow->priv, flow, attr->branch_false);
1842 if (err)
1843 goto err_out;
1844 }
1845
1846 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1847 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
1848 if (err)
1849 goto err_out;
1850 }
1851
1852 err_out:
1853 return err;
1854 }
1855
1856 static int
1857 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1858 struct mlx5e_tc_flow *flow,
1859 struct netlink_ext_ack *extack)
1860 {
1861 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1862 struct mlx5e_tc_flow_parse_attr *parse_attr;
1863 struct mlx5_flow_attr *attr = flow->attr;
1864 struct mlx5_esw_flow_attr *esw_attr;
1865 u32 max_prio, max_chain;
1866 int err = 0;
1867
1868 parse_attr = attr->parse_attr;
1869 esw_attr = attr->esw_attr;
1870
1871 /* We check chain range only for tc flows.
1872 * For ft flows, we checked attr->chain was originally 0 and set it to
1873 * FDB_FT_CHAIN which is outside tc range.
1874 * See mlx5e_rep_setup_ft_cb().
1875 */
1876 max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1877 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1878 NL_SET_ERR_MSG_MOD(extack,
1879 "Requested chain is out of supported range");
1880 err = -EOPNOTSUPP;
1881 goto err_out;
1882 }
1883
1884 max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1885 if (attr->prio > max_prio) {
1886 NL_SET_ERR_MSG_MOD(extack,
1887 "Requested priority is out of supported range");
1888 err = -EOPNOTSUPP;
1889 goto err_out;
1890 }
1891
1892 if (flow_flag_test(flow, TUN_RX)) {
1893 err = mlx5e_attach_decap_route(priv, flow);
1894 if (err)
1895 goto err_out;
1896
1897 if (!attr->chain && esw_attr->int_port &&
1898 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1899 /* If decap route device is internal port, change the
1900 * source vport value in reg_c0 back to uplink just in
1901 * case the rule performs goto chain > 0. If we have a miss
1902 * on chain > 0 we want the metadata regs to hold the
1903 * chain id so SW will resume handling of this packet
1904 * from the proper chain.
1905 */
1906 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1907 esw_attr->in_rep->vport);
1908
1909 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1910 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1911 metadata);
1912 if (err)
1913 goto err_out;
1914
1915 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1916 }
1917 }
1918
1919 if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1920 err = mlx5e_attach_decap(priv, flow, extack);
1921 if (err)
1922 goto err_out;
1923 }
1924
1925 if (netif_is_ovs_master(parse_attr->filter_dev)) {
1926 struct mlx5e_tc_int_port *int_port;
1927
1928 if (attr->chain) {
1929 NL_SET_ERR_MSG_MOD(extack,
1930 "Internal port rule is only supported on chain 0");
1931 err = -EOPNOTSUPP;
1932 goto err_out;
1933 }
1934
1935 if (attr->dest_chain) {
1936 NL_SET_ERR_MSG_MOD(extack,
1937 "Internal port rule offload doesn't support goto action");
1938 err = -EOPNOTSUPP;
1939 goto err_out;
1940 }
1941
1942 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1943 parse_attr->filter_dev->ifindex,
1944 flow_flag_test(flow, EGRESS) ?
1945 MLX5E_TC_INT_PORT_EGRESS :
1946 MLX5E_TC_INT_PORT_INGRESS);
1947 if (IS_ERR(int_port)) {
1948 err = PTR_ERR(int_port);
1949 goto err_out;
1950 }
1951
1952 esw_attr->int_port = int_port;
1953 }
1954
1955 err = post_process_attr(flow, attr, extack);
1956 if (err)
1957 goto err_out;
1958
1959 err = mlx5e_tc_act_stats_add_flow(get_act_stats_handle(priv), flow);
1960 if (err)
1961 goto err_out;
1962
1963 /* we get here if one of the following takes place:
1964 * (1) there's no error
1965 * (2) there's an encap action and we don't have valid neigh
1966 */
1967 if (flow_flag_test(flow, SLOW))
1968 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1969 else
1970 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1971
1972 if (IS_ERR(flow->rule[0])) {
1973 err = PTR_ERR(flow->rule[0]);
1974 goto err_out;
1975 }
1976 flow_flag_set(flow, OFFLOADED);
1977
1978 return 0;
1979
1980 err_out:
1981 flow_flag_set(flow, FAILED);
1982 return err;
1983 }
1984
1985 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1986 {
1987 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1988 void *headers_v = MLX5_ADDR_OF(fte_match_param,
1989 spec->match_value,
1990 misc_parameters_3);
1991 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1992 headers_v,
1993 geneve_tlv_option_0_data);
1994
1995 return !!geneve_tlv_opt_0_data;
1996 }
1997
1998 static void free_branch_attr(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
1999 {
2000 if (!attr)
2001 return;
2002
2003 mlx5_free_flow_attr_actions(flow, attr);
2004 kvfree(attr->parse_attr);
2005 kfree(attr);
2006 }
2007
2008 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
2009 struct mlx5e_tc_flow *flow)
2010 {
2011 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2012 struct mlx5_flow_attr *attr = flow->attr;
2013 struct mlx5_esw_flow_attr *esw_attr;
2014
2015 esw_attr = attr->esw_attr;
2016 mlx5e_put_flow_tunnel_id(flow);
2017
2018 if (flow_flag_test(flow, NOT_READY))
2019 remove_unready_flow(flow);
2020
2021 if (mlx5e_is_offloaded_flow(flow)) {
2022 if (flow_flag_test(flow, SLOW))
2023 mlx5e_tc_unoffload_from_slow_path(esw, flow);
2024 else
2025 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
2026 }
2027 complete_all(&flow->del_hw_done);
2028
2029 if (mlx5_flow_has_geneve_opt(flow))
2030 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
2031
2032 if (flow->decap_route)
2033 mlx5e_detach_decap_route(priv, flow);
2034
2035 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
2036
2037 if (esw_attr->int_port)
2038 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
2039
2040 if (esw_attr->dest_int_port)
2041 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
2042
2043 if (flow_flag_test(flow, L3_TO_L2_DECAP))
2044 mlx5e_detach_decap(priv, flow);
2045
2046 mlx5e_tc_act_stats_del_flow(get_act_stats_handle(priv), flow);
2047
2048 free_flow_post_acts(flow);
2049 mlx5_free_flow_attr_actions(flow, attr);
2050
2051 kvfree(attr->esw_attr->rx_tun_attr);
2052 kvfree(attr->parse_attr);
2053 kfree(flow->attr);
2054 }
2055
2056 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
2057 {
2058 struct mlx5_flow_attr *attr;
2059
2060 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
2061 return attr->counter;
2062 }
2063
2064 /* Iterate over tmp_list of flows attached to flow_list head. */
2065 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
2066 {
2067 struct mlx5e_tc_flow *flow, *tmp;
2068
2069 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
2070 mlx5e_flow_put(priv, flow);
2071 }
2072
2073 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
2074 {
2075 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
2076
2077 if (!flow_flag_test(flow, ESWITCH) ||
2078 !flow_flag_test(flow, DUP))
2079 return;
2080
2081 mutex_lock(&esw->offloads.peer_mutex);
2082 list_del(&flow->peer);
2083 mutex_unlock(&esw->offloads.peer_mutex);
2084
2085 flow_flag_clear(flow, DUP);
2086
2087 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
2088 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
2089 kfree(flow->peer_flow);
2090 }
2091
2092 flow->peer_flow = NULL;
2093 }
2094
2095 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
2096 {
2097 struct mlx5_core_dev *dev = flow->priv->mdev;
2098 struct mlx5_devcom *devcom = dev->priv.devcom;
2099 struct mlx5_eswitch *peer_esw;
2100
2101 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2102 if (!peer_esw)
2103 return;
2104
2105 __mlx5e_tc_del_fdb_peer_flow(flow);
2106 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
2107 }
2108
2109 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
2110 struct mlx5e_tc_flow *flow)
2111 {
2112 if (mlx5e_is_eswitch_flow(flow)) {
2113 mlx5e_tc_del_fdb_peer_flow(flow);
2114 mlx5e_tc_del_fdb_flow(priv, flow);
2115 } else {
2116 mlx5e_tc_del_nic_flow(priv, flow);
2117 }
2118 }
2119
2120 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
2121 {
2122 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2123 struct flow_action *flow_action = &rule->action;
2124 const struct flow_action_entry *act;
2125 int i;
2126
2127 if (chain)
2128 return false;
2129
2130 flow_action_for_each(i, act, flow_action) {
2131 switch (act->id) {
2132 case FLOW_ACTION_GOTO:
2133 return true;
2134 case FLOW_ACTION_SAMPLE:
2135 return true;
2136 default:
2137 continue;
2138 }
2139 }
2140
2141 return false;
2142 }
2143
2144 static int
2145 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
2146 struct flow_dissector_key_enc_opts *opts,
2147 struct netlink_ext_ack *extack,
2148 bool *dont_care)
2149 {
2150 struct geneve_opt *opt;
2151 int off = 0;
2152
2153 *dont_care = true;
2154
2155 while (opts->len > off) {
2156 opt = (struct geneve_opt *)&opts->data[off];
2157
2158 if (!(*dont_care) || opt->opt_class || opt->type ||
2159 memchr_inv(opt->opt_data, 0, opt->length * 4)) {
2160 *dont_care = false;
2161
2162 if (opt->opt_class != htons(U16_MAX) ||
2163 opt->type != U8_MAX) {
2164 NL_SET_ERR_MSG_MOD(extack,
2165 "Partial match of tunnel options in chain > 0 isn't supported");
2166 netdev_warn(priv->netdev,
2167 "Partial match of tunnel options in chain > 0 isn't supported");
2168 return -EOPNOTSUPP;
2169 }
2170 }
2171
2172 off += sizeof(struct geneve_opt) + opt->length * 4;
2173 }
2174
2175 return 0;
2176 }
2177
2178 #define COPY_DISSECTOR(rule, diss_key, dst)\
2179 ({ \
2180 struct flow_rule *__rule = (rule);\
2181 typeof(dst) __dst = dst;\
2182 \
2183 memcpy(__dst,\
2184 skb_flow_dissector_target(__rule->match.dissector,\
2185 diss_key,\
2186 __rule->match.key),\
2187 sizeof(*__dst));\
2188 })
2189
2190 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
2191 struct mlx5e_tc_flow *flow,
2192 struct flow_cls_offload *f,
2193 struct net_device *filter_dev)
2194 {
2195 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2196 struct netlink_ext_ack *extack = f->common.extack;
2197 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
2198 struct flow_match_enc_opts enc_opts_match;
2199 struct tunnel_match_enc_opts tun_enc_opts;
2200 struct mlx5_rep_uplink_priv *uplink_priv;
2201 struct mlx5_flow_attr *attr = flow->attr;
2202 struct mlx5e_rep_priv *uplink_rpriv;
2203 struct tunnel_match_key tunnel_key;
2204 bool enc_opts_is_dont_care = true;
2205 u32 tun_id, enc_opts_id = 0;
2206 struct mlx5_eswitch *esw;
2207 u32 value, mask;
2208 int err;
2209
2210 esw = priv->mdev->priv.eswitch;
2211 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2212 uplink_priv = &uplink_rpriv->uplink_priv;
2213
2214 memset(&tunnel_key, 0, sizeof(tunnel_key));
2215 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
2216 &tunnel_key.enc_control);
2217 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
2218 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
2219 &tunnel_key.enc_ipv4);
2220 else
2221 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
2222 &tunnel_key.enc_ipv6);
2223 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
2224 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
2225 &tunnel_key.enc_tp);
2226 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
2227 &tunnel_key.enc_key_id);
2228 tunnel_key.filter_ifindex = filter_dev->ifindex;
2229
2230 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
2231 if (err)
2232 return err;
2233
2234 flow_rule_match_enc_opts(rule, &enc_opts_match);
2235 err = enc_opts_is_dont_care_or_full_match(priv,
2236 enc_opts_match.mask,
2237 extack,
2238 &enc_opts_is_dont_care);
2239 if (err)
2240 goto err_enc_opts;
2241
2242 if (!enc_opts_is_dont_care) {
2243 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
2244 memcpy(&tun_enc_opts.key, enc_opts_match.key,
2245 sizeof(*enc_opts_match.key));
2246 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
2247 sizeof(*enc_opts_match.mask));
2248
2249 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
2250 &tun_enc_opts, &enc_opts_id);
2251 if (err)
2252 goto err_enc_opts;
2253 }
2254
2255 value = tun_id << ENC_OPTS_BITS | enc_opts_id;
2256 mask = enc_opts_id ? TUNNEL_ID_MASK :
2257 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
2258
2259 if (attr->chain) {
2260 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
2261 TUNNEL_TO_REG, value, mask);
2262 } else {
2263 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
2264 err = mlx5e_tc_match_to_reg_set(priv->mdev,
2265 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
2266 TUNNEL_TO_REG, value);
2267 if (err)
2268 goto err_set;
2269
2270 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2271 }
2272
2273 flow->attr->tunnel_id = value;
2274 return 0;
2275
2276 err_set:
2277 if (enc_opts_id)
2278 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2279 enc_opts_id);
2280 err_enc_opts:
2281 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2282 return err;
2283 }
2284
2285 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
2286 {
2287 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
2288 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
2289 struct mlx5_rep_uplink_priv *uplink_priv;
2290 struct mlx5e_rep_priv *uplink_rpriv;
2291 struct mlx5_eswitch *esw;
2292
2293 esw = flow->priv->mdev->priv.eswitch;
2294 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
2295 uplink_priv = &uplink_rpriv->uplink_priv;
2296
2297 if (tun_id)
2298 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
2299 if (enc_opts_id)
2300 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
2301 enc_opts_id);
2302 }
2303
2304 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2305 struct flow_match_basic *match, bool outer,
2306 void *headers_c, void *headers_v)
2307 {
2308 bool ip_version_cap;
2309
2310 ip_version_cap = outer ?
2311 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2312 ft_field_support.outer_ip_version) :
2313 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2314 ft_field_support.inner_ip_version);
2315
2316 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2317 (match->key->n_proto == htons(ETH_P_IP) ||
2318 match->key->n_proto == htons(ETH_P_IPV6))) {
2319 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2320 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2321 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2322 } else {
2323 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2324 ntohs(match->mask->n_proto));
2325 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2326 ntohs(match->key->n_proto));
2327 }
2328 }
2329
2330 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2331 {
2332 void *headers_v;
2333 u16 ethertype;
2334 u8 ip_version;
2335
2336 if (outer)
2337 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2338 else
2339 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2340
2341 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2342 /* Return ip_version converted from ethertype anyway */
2343 if (!ip_version) {
2344 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2345 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2346 ip_version = 4;
2347 else if (ethertype == ETH_P_IPV6)
2348 ip_version = 6;
2349 }
2350 return ip_version;
2351 }
2352
2353 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2354 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2355 * +---------+----------------------------------------+
2356 * |Arriving | Arriving Outer Header |
2357 * | Inner +---------+---------+---------+----------+
2358 * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
2359 * +---------+---------+---------+---------+----------+
2360 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
2361 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
2362 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
2363 * | CE | CE | CE | CE | CE |
2364 * +---------+---------+---------+---------+----------+
2365 *
2366 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2367 * the inner ip_ecn value before hardware decap action.
2368 *
2369 * Cells marked are changed from original inner packet ip_ecn value during decap, and
2370 * so matching those values on inner ip_ecn before decap will fail.
2371 *
2372 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2373 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2374 * and such we can drop the inner ip_ecn=CE match.
2375 */
2376
2377 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2378 struct flow_cls_offload *f,
2379 bool *match_inner_ecn)
2380 {
2381 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2382 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2383 struct netlink_ext_ack *extack = f->common.extack;
2384 struct flow_match_ip match;
2385
2386 *match_inner_ecn = true;
2387
2388 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2389 flow_rule_match_enc_ip(rule, &match);
2390 outer_ecn_key = match.key->tos & INET_ECN_MASK;
2391 outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2392 }
2393
2394 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2395 flow_rule_match_ip(rule, &match);
2396 inner_ecn_key = match.key->tos & INET_ECN_MASK;
2397 inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2398 }
2399
2400 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2401 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2402 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2403 return -EOPNOTSUPP;
2404 }
2405
2406 if (!outer_ecn_mask) {
2407 if (!inner_ecn_mask)
2408 return 0;
2409
2410 NL_SET_ERR_MSG_MOD(extack,
2411 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2412 netdev_warn(priv->netdev,
2413 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2414 return -EOPNOTSUPP;
2415 }
2416
2417 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2418 NL_SET_ERR_MSG_MOD(extack,
2419 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2420 netdev_warn(priv->netdev,
2421 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2422 return -EOPNOTSUPP;
2423 }
2424
2425 if (!inner_ecn_mask)
2426 return 0;
2427
2428 /* Both inner and outer have full mask on ecn */
2429
2430 if (outer_ecn_key == INET_ECN_ECT_1) {
2431 /* inner ecn might change by DECAP action */
2432
2433 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2434 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2435 return -EOPNOTSUPP;
2436 }
2437
2438 if (outer_ecn_key != INET_ECN_CE)
2439 return 0;
2440
2441 if (inner_ecn_key != INET_ECN_CE) {
2442 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2443 NL_SET_ERR_MSG_MOD(extack,
2444 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2445 netdev_warn(priv->netdev,
2446 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2447 return -EOPNOTSUPP;
2448 }
2449
2450 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2451 * drop match on inner ecn
2452 */
2453 *match_inner_ecn = false;
2454
2455 return 0;
2456 }
2457
2458 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2459 struct mlx5e_tc_flow *flow,
2460 struct mlx5_flow_spec *spec,
2461 struct flow_cls_offload *f,
2462 struct net_device *filter_dev,
2463 u8 *match_level,
2464 bool *match_inner)
2465 {
2466 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2467 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2468 struct netlink_ext_ack *extack = f->common.extack;
2469 bool needs_mapping, sets_mapping;
2470 int err;
2471
2472 if (!mlx5e_is_eswitch_flow(flow)) {
2473 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2474 return -EOPNOTSUPP;
2475 }
2476
2477 needs_mapping = !!flow->attr->chain;
2478 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2479 *match_inner = !needs_mapping;
2480
2481 if ((needs_mapping || sets_mapping) &&
2482 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2483 NL_SET_ERR_MSG_MOD(extack,
2484 "Chains on tunnel devices isn't supported without register loopback support");
2485 netdev_warn(priv->netdev,
2486 "Chains on tunnel devices isn't supported without register loopback support");
2487 return -EOPNOTSUPP;
2488 }
2489
2490 if (!flow->attr->chain) {
2491 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2492 match_level);
2493 if (err) {
2494 NL_SET_ERR_MSG_MOD(extack,
2495 "Failed to parse tunnel attributes");
2496 netdev_warn(priv->netdev,
2497 "Failed to parse tunnel attributes");
2498 return err;
2499 }
2500
2501 /* With mpls over udp we decapsulate using packet reformat
2502 * object
2503 */
2504 if (!netif_is_bareudp(filter_dev))
2505 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2506 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2507 if (err)
2508 return err;
2509 } else if (tunnel) {
2510 struct mlx5_flow_spec *tmp_spec;
2511
2512 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2513 if (!tmp_spec) {
2514 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for tunnel tmp spec");
2515 netdev_warn(priv->netdev, "Failed to allocate memory for tunnel tmp spec");
2516 return -ENOMEM;
2517 }
2518 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2519
2520 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2521 if (err) {
2522 kvfree(tmp_spec);
2523 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2524 netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2525 return err;
2526 }
2527 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2528 kvfree(tmp_spec);
2529 if (err)
2530 return err;
2531 }
2532
2533 if (!needs_mapping && !sets_mapping)
2534 return 0;
2535
2536 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2537 }
2538
2539 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2540 {
2541 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2542 inner_headers);
2543 }
2544
2545 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2546 {
2547 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2548 inner_headers);
2549 }
2550
2551 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2552 {
2553 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2554 outer_headers);
2555 }
2556
2557 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2558 {
2559 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2560 outer_headers);
2561 }
2562
2563 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2564 {
2565 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2566 get_match_inner_headers_value(spec) :
2567 get_match_outer_headers_value(spec);
2568 }
2569
2570 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2571 {
2572 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2573 get_match_inner_headers_criteria(spec) :
2574 get_match_outer_headers_criteria(spec);
2575 }
2576
2577 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2578 struct flow_cls_offload *f)
2579 {
2580 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2581 struct netlink_ext_ack *extack = f->common.extack;
2582 struct net_device *ingress_dev;
2583 struct flow_match_meta match;
2584
2585 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2586 return 0;
2587
2588 flow_rule_match_meta(rule, &match);
2589 if (!match.mask->ingress_ifindex)
2590 return 0;
2591
2592 if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2593 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2594 return -EOPNOTSUPP;
2595 }
2596
2597 ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2598 match.key->ingress_ifindex);
2599 if (!ingress_dev) {
2600 NL_SET_ERR_MSG_MOD(extack,
2601 "Can't find the ingress port to match on");
2602 return -ENOENT;
2603 }
2604
2605 if (ingress_dev != filter_dev) {
2606 NL_SET_ERR_MSG_MOD(extack,
2607 "Can't match on the ingress filter port");
2608 return -EOPNOTSUPP;
2609 }
2610
2611 return 0;
2612 }
2613
2614 static bool skip_key_basic(struct net_device *filter_dev,
2615 struct flow_cls_offload *f)
2616 {
2617 /* When doing mpls over udp decap, the user needs to provide
2618 * MPLS_UC as the protocol in order to be able to match on mpls
2619 * label fields. However, the actual ethertype is IP so we want to
2620 * avoid matching on this, otherwise we'll fail the match.
2621 */
2622 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2623 return true;
2624
2625 return false;
2626 }
2627
2628 static int __parse_cls_flower(struct mlx5e_priv *priv,
2629 struct mlx5e_tc_flow *flow,
2630 struct mlx5_flow_spec *spec,
2631 struct flow_cls_offload *f,
2632 struct net_device *filter_dev,
2633 u8 *inner_match_level, u8 *outer_match_level)
2634 {
2635 struct netlink_ext_ack *extack = f->common.extack;
2636 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2637 outer_headers);
2638 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2639 outer_headers);
2640 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2641 misc_parameters);
2642 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2643 misc_parameters);
2644 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2645 misc_parameters_3);
2646 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2647 misc_parameters_3);
2648 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2649 struct flow_dissector *dissector = rule->match.dissector;
2650 enum fs_flow_table_type fs_type;
2651 bool match_inner_ecn = true;
2652 u16 addr_type = 0;
2653 u8 ip_proto = 0;
2654 u8 *match_level;
2655 int err;
2656
2657 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2658 match_level = outer_match_level;
2659
2660 if (dissector->used_keys &
2661 ~(BIT(FLOW_DISSECTOR_KEY_META) |
2662 BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2663 BIT(FLOW_DISSECTOR_KEY_BASIC) |
2664 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2665 BIT(FLOW_DISSECTOR_KEY_VLAN) |
2666 BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2667 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2668 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2669 BIT(FLOW_DISSECTOR_KEY_PORTS) |
2670 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2671 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2672 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2673 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2674 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2675 BIT(FLOW_DISSECTOR_KEY_TCP) |
2676 BIT(FLOW_DISSECTOR_KEY_IP) |
2677 BIT(FLOW_DISSECTOR_KEY_CT) |
2678 BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2679 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2680 BIT(FLOW_DISSECTOR_KEY_ICMP) |
2681 BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2682 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2683 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2684 dissector->used_keys);
2685 return -EOPNOTSUPP;
2686 }
2687
2688 if (mlx5e_get_tc_tun(filter_dev)) {
2689 bool match_inner = false;
2690
2691 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2692 outer_match_level, &match_inner);
2693 if (err)
2694 return err;
2695
2696 if (match_inner) {
2697 /* header pointers should point to the inner headers
2698 * if the packet was decapsulated already.
2699 * outer headers are set by parse_tunnel_attr.
2700 */
2701 match_level = inner_match_level;
2702 headers_c = get_match_inner_headers_criteria(spec);
2703 headers_v = get_match_inner_headers_value(spec);
2704 }
2705
2706 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2707 if (err)
2708 return err;
2709 }
2710
2711 err = mlx5e_flower_parse_meta(filter_dev, f);
2712 if (err)
2713 return err;
2714
2715 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2716 !skip_key_basic(filter_dev, f)) {
2717 struct flow_match_basic match;
2718
2719 flow_rule_match_basic(rule, &match);
2720 mlx5e_tc_set_ethertype(priv->mdev, &match,
2721 match_level == outer_match_level,
2722 headers_c, headers_v);
2723
2724 if (match.mask->n_proto)
2725 *match_level = MLX5_MATCH_L2;
2726 }
2727 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2728 is_vlan_dev(filter_dev)) {
2729 struct flow_dissector_key_vlan filter_dev_mask;
2730 struct flow_dissector_key_vlan filter_dev_key;
2731 struct flow_match_vlan match;
2732
2733 if (is_vlan_dev(filter_dev)) {
2734 match.key = &filter_dev_key;
2735 match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2736 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2737 match.key->vlan_priority = 0;
2738 match.mask = &filter_dev_mask;
2739 memset(match.mask, 0xff, sizeof(*match.mask));
2740 match.mask->vlan_priority = 0;
2741 } else {
2742 flow_rule_match_vlan(rule, &match);
2743 }
2744 if (match.mask->vlan_id ||
2745 match.mask->vlan_priority ||
2746 match.mask->vlan_tpid) {
2747 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2748 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2749 svlan_tag, 1);
2750 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2751 svlan_tag, 1);
2752 } else {
2753 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2754 cvlan_tag, 1);
2755 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2756 cvlan_tag, 1);
2757 }
2758
2759 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2760 match.mask->vlan_id);
2761 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2762 match.key->vlan_id);
2763
2764 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2765 match.mask->vlan_priority);
2766 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2767 match.key->vlan_priority);
2768
2769 *match_level = MLX5_MATCH_L2;
2770
2771 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2772 match.mask->vlan_eth_type &&
2773 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2774 ft_field_support.outer_second_vid,
2775 fs_type)) {
2776 MLX5_SET(fte_match_set_misc, misc_c,
2777 outer_second_cvlan_tag, 1);
2778 spec->match_criteria_enable |=
2779 MLX5_MATCH_MISC_PARAMETERS;
2780 }
2781 }
2782 } else if (*match_level != MLX5_MATCH_NONE) {
2783 /* cvlan_tag enabled in match criteria and
2784 * disabled in match value means both S & C tags
2785 * don't exist (untagged of both)
2786 */
2787 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2788 *match_level = MLX5_MATCH_L2;
2789 }
2790
2791 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2792 struct flow_match_vlan match;
2793
2794 flow_rule_match_cvlan(rule, &match);
2795 if (match.mask->vlan_id ||
2796 match.mask->vlan_priority ||
2797 match.mask->vlan_tpid) {
2798 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2799 fs_type)) {
2800 NL_SET_ERR_MSG_MOD(extack,
2801 "Matching on CVLAN is not supported");
2802 return -EOPNOTSUPP;
2803 }
2804
2805 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2806 MLX5_SET(fte_match_set_misc, misc_c,
2807 outer_second_svlan_tag, 1);
2808 MLX5_SET(fte_match_set_misc, misc_v,
2809 outer_second_svlan_tag, 1);
2810 } else {
2811 MLX5_SET(fte_match_set_misc, misc_c,
2812 outer_second_cvlan_tag, 1);
2813 MLX5_SET(fte_match_set_misc, misc_v,
2814 outer_second_cvlan_tag, 1);
2815 }
2816
2817 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2818 match.mask->vlan_id);
2819 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2820 match.key->vlan_id);
2821 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2822 match.mask->vlan_priority);
2823 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2824 match.key->vlan_priority);
2825
2826 *match_level = MLX5_MATCH_L2;
2827 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2828 }
2829 }
2830
2831 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2832 struct flow_match_eth_addrs match;
2833
2834 flow_rule_match_eth_addrs(rule, &match);
2835 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2836 dmac_47_16),
2837 match.mask->dst);
2838 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2839 dmac_47_16),
2840 match.key->dst);
2841
2842 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2843 smac_47_16),
2844 match.mask->src);
2845 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2846 smac_47_16),
2847 match.key->src);
2848
2849 if (!is_zero_ether_addr(match.mask->src) ||
2850 !is_zero_ether_addr(match.mask->dst))
2851 *match_level = MLX5_MATCH_L2;
2852 }
2853
2854 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2855 struct flow_match_control match;
2856
2857 flow_rule_match_control(rule, &match);
2858 addr_type = match.key->addr_type;
2859
2860 /* the HW doesn't support frag first/later */
2861 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2862 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2863 return -EOPNOTSUPP;
2864 }
2865
2866 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2867 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2868 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2869 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2870
2871 /* the HW doesn't need L3 inline to match on frag=no */
2872 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2873 *match_level = MLX5_MATCH_L2;
2874 /* *** L2 attributes parsing up to here *** */
2875 else
2876 *match_level = MLX5_MATCH_L3;
2877 }
2878 }
2879
2880 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2881 struct flow_match_basic match;
2882
2883 flow_rule_match_basic(rule, &match);
2884 ip_proto = match.key->ip_proto;
2885
2886 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2887 match.mask->ip_proto);
2888 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2889 match.key->ip_proto);
2890
2891 if (match.mask->ip_proto)
2892 *match_level = MLX5_MATCH_L3;
2893 }
2894
2895 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2896 struct flow_match_ipv4_addrs match;
2897
2898 flow_rule_match_ipv4_addrs(rule, &match);
2899 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2900 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2901 &match.mask->src, sizeof(match.mask->src));
2902 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2903 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2904 &match.key->src, sizeof(match.key->src));
2905 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2906 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2907 &match.mask->dst, sizeof(match.mask->dst));
2908 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2909 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2910 &match.key->dst, sizeof(match.key->dst));
2911
2912 if (match.mask->src || match.mask->dst)
2913 *match_level = MLX5_MATCH_L3;
2914 }
2915
2916 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2917 struct flow_match_ipv6_addrs match;
2918
2919 flow_rule_match_ipv6_addrs(rule, &match);
2920 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2921 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2922 &match.mask->src, sizeof(match.mask->src));
2923 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2924 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2925 &match.key->src, sizeof(match.key->src));
2926
2927 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2928 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2929 &match.mask->dst, sizeof(match.mask->dst));
2930 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2931 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2932 &match.key->dst, sizeof(match.key->dst));
2933
2934 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2935 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2936 *match_level = MLX5_MATCH_L3;
2937 }
2938
2939 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2940 struct flow_match_ip match;
2941
2942 flow_rule_match_ip(rule, &match);
2943 if (match_inner_ecn) {
2944 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2945 match.mask->tos & 0x3);
2946 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2947 match.key->tos & 0x3);
2948 }
2949
2950 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2951 match.mask->tos >> 2);
2952 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2953 match.key->tos >> 2);
2954
2955 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2956 match.mask->ttl);
2957 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2958 match.key->ttl);
2959
2960 if (match.mask->ttl &&
2961 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2962 ft_field_support.outer_ipv4_ttl)) {
2963 NL_SET_ERR_MSG_MOD(extack,
2964 "Matching on TTL is not supported");
2965 return -EOPNOTSUPP;
2966 }
2967
2968 if (match.mask->tos || match.mask->ttl)
2969 *match_level = MLX5_MATCH_L3;
2970 }
2971
2972 /* *** L3 attributes parsing up to here *** */
2973
2974 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2975 struct flow_match_ports match;
2976
2977 flow_rule_match_ports(rule, &match);
2978 switch (ip_proto) {
2979 case IPPROTO_TCP:
2980 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2981 tcp_sport, ntohs(match.mask->src));
2982 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2983 tcp_sport, ntohs(match.key->src));
2984
2985 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2986 tcp_dport, ntohs(match.mask->dst));
2987 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2988 tcp_dport, ntohs(match.key->dst));
2989 break;
2990
2991 case IPPROTO_UDP:
2992 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2993 udp_sport, ntohs(match.mask->src));
2994 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2995 udp_sport, ntohs(match.key->src));
2996
2997 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2998 udp_dport, ntohs(match.mask->dst));
2999 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
3000 udp_dport, ntohs(match.key->dst));
3001 break;
3002 default:
3003 NL_SET_ERR_MSG_MOD(extack,
3004 "Only UDP and TCP transports are supported for L4 matching");
3005 netdev_err(priv->netdev,
3006 "Only UDP and TCP transport are supported\n");
3007 return -EINVAL;
3008 }
3009
3010 if (match.mask->src || match.mask->dst)
3011 *match_level = MLX5_MATCH_L4;
3012 }
3013
3014 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
3015 struct flow_match_tcp match;
3016
3017 flow_rule_match_tcp(rule, &match);
3018 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
3019 ntohs(match.mask->flags));
3020 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
3021 ntohs(match.key->flags));
3022
3023 if (match.mask->flags)
3024 *match_level = MLX5_MATCH_L4;
3025 }
3026 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
3027 struct flow_match_icmp match;
3028
3029 flow_rule_match_icmp(rule, &match);
3030 switch (ip_proto) {
3031 case IPPROTO_ICMP:
3032 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3033 MLX5_FLEX_PROTO_ICMP)) {
3034 NL_SET_ERR_MSG_MOD(extack,
3035 "Match on Flex protocols for ICMP is not supported");
3036 return -EOPNOTSUPP;
3037 }
3038 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
3039 match.mask->type);
3040 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
3041 match.key->type);
3042 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
3043 match.mask->code);
3044 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
3045 match.key->code);
3046 break;
3047 case IPPROTO_ICMPV6:
3048 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
3049 MLX5_FLEX_PROTO_ICMPV6)) {
3050 NL_SET_ERR_MSG_MOD(extack,
3051 "Match on Flex protocols for ICMPV6 is not supported");
3052 return -EOPNOTSUPP;
3053 }
3054 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
3055 match.mask->type);
3056 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
3057 match.key->type);
3058 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
3059 match.mask->code);
3060 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
3061 match.key->code);
3062 break;
3063 default:
3064 NL_SET_ERR_MSG_MOD(extack,
3065 "Code and type matching only with ICMP and ICMPv6");
3066 netdev_err(priv->netdev,
3067 "Code and type matching only with ICMP and ICMPv6\n");
3068 return -EINVAL;
3069 }
3070 if (match.mask->code || match.mask->type) {
3071 *match_level = MLX5_MATCH_L4;
3072 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
3073 }
3074 }
3075 /* Currently supported only for MPLS over UDP */
3076 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
3077 !netif_is_bareudp(filter_dev)) {
3078 NL_SET_ERR_MSG_MOD(extack,
3079 "Matching on MPLS is supported only for MPLS over UDP");
3080 netdev_err(priv->netdev,
3081 "Matching on MPLS is supported only for MPLS over UDP\n");
3082 return -EOPNOTSUPP;
3083 }
3084
3085 return 0;
3086 }
3087
3088 static int parse_cls_flower(struct mlx5e_priv *priv,
3089 struct mlx5e_tc_flow *flow,
3090 struct mlx5_flow_spec *spec,
3091 struct flow_cls_offload *f,
3092 struct net_device *filter_dev)
3093 {
3094 u8 inner_match_level, outer_match_level, non_tunnel_match_level;
3095 struct netlink_ext_ack *extack = f->common.extack;
3096 struct mlx5_core_dev *dev = priv->mdev;
3097 struct mlx5_eswitch *esw = dev->priv.eswitch;
3098 struct mlx5e_rep_priv *rpriv = priv->ppriv;
3099 struct mlx5_eswitch_rep *rep;
3100 bool is_eswitch_flow;
3101 int err;
3102
3103 inner_match_level = MLX5_MATCH_NONE;
3104 outer_match_level = MLX5_MATCH_NONE;
3105
3106 err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
3107 &inner_match_level, &outer_match_level);
3108 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
3109 outer_match_level : inner_match_level;
3110
3111 is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
3112 if (!err && is_eswitch_flow) {
3113 rep = rpriv->rep;
3114 if (rep->vport != MLX5_VPORT_UPLINK &&
3115 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
3116 esw->offloads.inline_mode < non_tunnel_match_level)) {
3117 NL_SET_ERR_MSG_MOD(extack,
3118 "Flow is not offloaded due to min inline setting");
3119 netdev_warn(priv->netdev,
3120 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
3121 non_tunnel_match_level, esw->offloads.inline_mode);
3122 return -EOPNOTSUPP;
3123 }
3124 }
3125
3126 flow->attr->inner_match_level = inner_match_level;
3127 flow->attr->outer_match_level = outer_match_level;
3128
3129
3130 return err;
3131 }
3132
3133 struct mlx5_fields {
3134 u8 field;
3135 u8 field_bsize;
3136 u32 field_mask;
3137 u32 offset;
3138 u32 match_offset;
3139 };
3140
3141 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
3142 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
3143 offsetof(struct pedit_headers, field) + (off), \
3144 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
3145
3146 /* masked values are the same and there are no rewrites that do not have a
3147 * match.
3148 */
3149 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
3150 type matchmaskx = *(type *)(matchmaskp); \
3151 type matchvalx = *(type *)(matchvalp); \
3152 type maskx = *(type *)(maskp); \
3153 type valx = *(type *)(valp); \
3154 \
3155 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
3156 matchmaskx)); \
3157 })
3158
3159 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
3160 void *matchmaskp, u8 bsize)
3161 {
3162 bool same = false;
3163
3164 switch (bsize) {
3165 case 8:
3166 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
3167 break;
3168 case 16:
3169 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
3170 break;
3171 case 32:
3172 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
3173 break;
3174 }
3175
3176 return same;
3177 }
3178
3179 static struct mlx5_fields fields[] = {
3180 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
3181 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
3182 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
3183 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
3184 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
3185 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
3186
3187 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
3188 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
3189 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
3190 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
3191
3192 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
3193 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
3194 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
3195 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
3196 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
3197 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
3198 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
3199 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
3200 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
3201 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
3202 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
3203 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
3204 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
3205 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
3206 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
3207 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
3208 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
3209 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
3210
3211 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
3212 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
3213 /* in linux iphdr tcp_flags is 8 bits long */
3214 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
3215
3216 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
3217 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
3218 };
3219
3220 static unsigned long mask_to_le(unsigned long mask, int size)
3221 {
3222 __be32 mask_be32;
3223 __be16 mask_be16;
3224
3225 if (size == 32) {
3226 mask_be32 = (__force __be32)(mask);
3227 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
3228 } else if (size == 16) {
3229 mask_be32 = (__force __be32)(mask);
3230 mask_be16 = *(__be16 *)&mask_be32;
3231 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
3232 }
3233
3234 return mask;
3235 }
3236
3237 static int offload_pedit_fields(struct mlx5e_priv *priv,
3238 int namespace,
3239 struct mlx5e_tc_flow_parse_attr *parse_attr,
3240 u32 *action_flags,
3241 struct netlink_ext_ack *extack)
3242 {
3243 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
3244 struct pedit_headers_action *hdrs = parse_attr->hdrs;
3245 void *headers_c, *headers_v, *action, *vals_p;
3246 u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
3247 struct mlx5e_tc_mod_hdr_acts *mod_acts;
3248 unsigned long mask, field_mask;
3249 int i, first, last, next_z;
3250 struct mlx5_fields *f;
3251 u8 cmd;
3252
3253 mod_acts = &parse_attr->mod_hdr_acts;
3254 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
3255 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
3256
3257 set_masks = &hdrs[0].masks;
3258 add_masks = &hdrs[1].masks;
3259 set_vals = &hdrs[0].vals;
3260 add_vals = &hdrs[1].vals;
3261
3262 for (i = 0; i < ARRAY_SIZE(fields); i++) {
3263 bool skip;
3264
3265 f = &fields[i];
3266 /* avoid seeing bits set from previous iterations */
3267 s_mask = 0;
3268 a_mask = 0;
3269
3270 s_masks_p = (void *)set_masks + f->offset;
3271 a_masks_p = (void *)add_masks + f->offset;
3272
3273 s_mask = *s_masks_p & f->field_mask;
3274 a_mask = *a_masks_p & f->field_mask;
3275
3276 if (!s_mask && !a_mask) /* nothing to offload here */
3277 continue;
3278
3279 if (s_mask && a_mask) {
3280 NL_SET_ERR_MSG_MOD(extack,
3281 "can't set and add to the same HW field");
3282 netdev_warn(priv->netdev,
3283 "mlx5: can't set and add to the same HW field (%x)\n",
3284 f->field);
3285 return -EOPNOTSUPP;
3286 }
3287
3288 skip = false;
3289 if (s_mask) {
3290 void *match_mask = headers_c + f->match_offset;
3291 void *match_val = headers_v + f->match_offset;
3292
3293 cmd = MLX5_ACTION_TYPE_SET;
3294 mask = s_mask;
3295 vals_p = (void *)set_vals + f->offset;
3296 /* don't rewrite if we have a match on the same value */
3297 if (cmp_val_mask(vals_p, s_masks_p, match_val,
3298 match_mask, f->field_bsize))
3299 skip = true;
3300 /* clear to denote we consumed this field */
3301 *s_masks_p &= ~f->field_mask;
3302 } else {
3303 cmd = MLX5_ACTION_TYPE_ADD;
3304 mask = a_mask;
3305 vals_p = (void *)add_vals + f->offset;
3306 /* add 0 is no change */
3307 if ((*(u32 *)vals_p & f->field_mask) == 0)
3308 skip = true;
3309 /* clear to denote we consumed this field */
3310 *a_masks_p &= ~f->field_mask;
3311 }
3312 if (skip)
3313 continue;
3314
3315 mask = mask_to_le(mask, f->field_bsize);
3316
3317 first = find_first_bit(&mask, f->field_bsize);
3318 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3319 last = find_last_bit(&mask, f->field_bsize);
3320 if (first < next_z && next_z < last) {
3321 NL_SET_ERR_MSG_MOD(extack,
3322 "rewrite of few sub-fields isn't supported");
3323 netdev_warn(priv->netdev,
3324 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3325 mask);
3326 return -EOPNOTSUPP;
3327 }
3328
3329 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3330 if (IS_ERR(action)) {
3331 NL_SET_ERR_MSG_MOD(extack,
3332 "too many pedit actions, can't offload");
3333 mlx5_core_warn(priv->mdev,
3334 "mlx5: parsed %d pedit actions, can't do more\n",
3335 mod_acts->num_actions);
3336 return PTR_ERR(action);
3337 }
3338
3339 MLX5_SET(set_action_in, action, action_type, cmd);
3340 MLX5_SET(set_action_in, action, field, f->field);
3341
3342 if (cmd == MLX5_ACTION_TYPE_SET) {
3343 int start;
3344
3345 field_mask = mask_to_le(f->field_mask, f->field_bsize);
3346
3347 /* if field is bit sized it can start not from first bit */
3348 start = find_first_bit(&field_mask, f->field_bsize);
3349
3350 MLX5_SET(set_action_in, action, offset, first - start);
3351 /* length is num of bits to be written, zero means length of 32 */
3352 MLX5_SET(set_action_in, action, length, (last - first + 1));
3353 }
3354
3355 if (f->field_bsize == 32)
3356 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3357 else if (f->field_bsize == 16)
3358 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3359 else if (f->field_bsize == 8)
3360 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3361
3362 ++mod_acts->num_actions;
3363 }
3364
3365 return 0;
3366 }
3367
3368 static const struct pedit_headers zero_masks = {};
3369
3370 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3371 struct mlx5e_tc_flow_parse_attr *parse_attr,
3372 struct netlink_ext_ack *extack)
3373 {
3374 struct pedit_headers *cmd_masks;
3375 u8 cmd;
3376
3377 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3378 cmd_masks = &parse_attr->hdrs[cmd].masks;
3379 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3380 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3381 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3382 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3383 16, 1, cmd_masks, sizeof(zero_masks), true);
3384 return -EOPNOTSUPP;
3385 }
3386 }
3387
3388 return 0;
3389 }
3390
3391 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3392 struct mlx5e_tc_flow_parse_attr *parse_attr,
3393 u32 *action_flags,
3394 struct netlink_ext_ack *extack)
3395 {
3396 int err;
3397
3398 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3399 if (err)
3400 goto out_dealloc_parsed_actions;
3401
3402 err = verify_offload_pedit_fields(priv, parse_attr, extack);
3403 if (err)
3404 goto out_dealloc_parsed_actions;
3405
3406 return 0;
3407
3408 out_dealloc_parsed_actions:
3409 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3410 return err;
3411 }
3412
3413 struct ip_ttl_word {
3414 __u8 ttl;
3415 __u8 protocol;
3416 __sum16 check;
3417 };
3418
3419 struct ipv6_hoplimit_word {
3420 __be16 payload_len;
3421 __u8 nexthdr;
3422 __u8 hop_limit;
3423 };
3424
3425 static bool
3426 is_flow_action_modify_ip_header(struct flow_action *flow_action)
3427 {
3428 const struct flow_action_entry *act;
3429 u32 mask, offset;
3430 u8 htype;
3431 int i;
3432
3433 /* For IPv4 & IPv6 header check 4 byte word,
3434 * to determine that modified fields
3435 * are NOT ttl & hop_limit only.
3436 */
3437 flow_action_for_each(i, act, flow_action) {
3438 if (act->id != FLOW_ACTION_MANGLE &&
3439 act->id != FLOW_ACTION_ADD)
3440 continue;
3441
3442 htype = act->mangle.htype;
3443 offset = act->mangle.offset;
3444 mask = ~act->mangle.mask;
3445
3446 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3447 struct ip_ttl_word *ttl_word =
3448 (struct ip_ttl_word *)&mask;
3449
3450 if (offset != offsetof(struct iphdr, ttl) ||
3451 ttl_word->protocol ||
3452 ttl_word->check)
3453 return true;
3454 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3455 struct ipv6_hoplimit_word *hoplimit_word =
3456 (struct ipv6_hoplimit_word *)&mask;
3457
3458 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3459 hoplimit_word->payload_len ||
3460 hoplimit_word->nexthdr)
3461 return true;
3462 }
3463 }
3464
3465 return false;
3466 }
3467
3468 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3469 struct mlx5_flow_spec *spec,
3470 struct flow_action *flow_action,
3471 u32 actions,
3472 struct netlink_ext_ack *extack)
3473 {
3474 bool modify_ip_header;
3475 void *headers_c;
3476 void *headers_v;
3477 u16 ethertype;
3478 u8 ip_proto;
3479
3480 headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3481 headers_v = mlx5e_get_match_headers_value(actions, spec);
3482 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3483
3484 /* for non-IP we only re-write MACs, so we're okay */
3485 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3486 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3487 goto out_ok;
3488
3489 modify_ip_header = is_flow_action_modify_ip_header(flow_action);
3490 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3491 if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3492 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3493 NL_SET_ERR_MSG_MOD(extack,
3494 "can't offload re-write of non TCP/UDP");
3495 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3496 ip_proto);
3497 return false;
3498 }
3499
3500 out_ok:
3501 return true;
3502 }
3503
3504 static bool
3505 actions_match_supported_fdb(struct mlx5e_priv *priv,
3506 struct mlx5e_tc_flow *flow,
3507 struct netlink_ext_ack *extack)
3508 {
3509 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3510
3511 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3512 NL_SET_ERR_MSG_MOD(extack,
3513 "current firmware doesn't support split rule for port mirroring");
3514 netdev_warn_once(priv->netdev,
3515 "current firmware doesn't support split rule for port mirroring\n");
3516 return false;
3517 }
3518
3519 return true;
3520 }
3521
3522 static bool
3523 actions_match_supported(struct mlx5e_priv *priv,
3524 struct flow_action *flow_action,
3525 u32 actions,
3526 struct mlx5e_tc_flow_parse_attr *parse_attr,
3527 struct mlx5e_tc_flow *flow,
3528 struct netlink_ext_ack *extack)
3529 {
3530 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3531 !modify_header_match_supported(priv, &parse_attr->spec, flow_action, actions,
3532 extack))
3533 return false;
3534
3535 if (mlx5e_is_eswitch_flow(flow) &&
3536 !actions_match_supported_fdb(priv, flow, extack))
3537 return false;
3538
3539 return true;
3540 }
3541
3542 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3543 {
3544 return priv->mdev == peer_priv->mdev;
3545 }
3546
3547 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3548 {
3549 struct mlx5_core_dev *fmdev, *pmdev;
3550 u64 fsystem_guid, psystem_guid;
3551
3552 fmdev = priv->mdev;
3553 pmdev = peer_priv->mdev;
3554
3555 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3556 psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3557
3558 return (fsystem_guid == psystem_guid);
3559 }
3560
3561 static int
3562 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3563 struct mlx5e_tc_flow *flow,
3564 struct mlx5_flow_attr *attr,
3565 struct netlink_ext_ack *extack)
3566 {
3567 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3568 struct pedit_headers_action *hdrs = parse_attr->hdrs;
3569 enum mlx5_flow_namespace_type ns_type;
3570 int err;
3571
3572 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3573 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3574 return 0;
3575
3576 ns_type = mlx5e_get_flow_namespace(flow);
3577
3578 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3579 if (err)
3580 return err;
3581
3582 if (parse_attr->mod_hdr_acts.num_actions > 0)
3583 return 0;
3584
3585 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3586 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3587 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3588
3589 if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3590 return 0;
3591
3592 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3593 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3594 attr->esw_attr->split_count = 0;
3595
3596 return 0;
3597 }
3598
3599 static struct mlx5_flow_attr*
3600 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3601 enum mlx5_flow_namespace_type ns_type)
3602 {
3603 struct mlx5e_tc_flow_parse_attr *parse_attr;
3604 u32 attr_sz = ns_to_attr_sz(ns_type);
3605 struct mlx5_flow_attr *attr2;
3606
3607 attr2 = mlx5_alloc_flow_attr(ns_type);
3608 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3609 if (!attr2 || !parse_attr) {
3610 kvfree(parse_attr);
3611 kfree(attr2);
3612 return NULL;
3613 }
3614
3615 memcpy(attr2, attr, attr_sz);
3616 INIT_LIST_HEAD(&attr2->list);
3617 parse_attr->filter_dev = attr->parse_attr->filter_dev;
3618 attr2->action = 0;
3619 attr2->counter = NULL;
3620 attr2->tc_act_cookies_count = 0;
3621 attr2->flags = 0;
3622 attr2->parse_attr = parse_attr;
3623 attr2->dest_chain = 0;
3624 attr2->dest_ft = NULL;
3625 attr2->act_id_restore_rule = NULL;
3626 memset(&attr2->ct_attr, 0, sizeof(attr2->ct_attr));
3627
3628 if (ns_type == MLX5_FLOW_NAMESPACE_FDB) {
3629 attr2->esw_attr->out_count = 0;
3630 attr2->esw_attr->split_count = 0;
3631 }
3632
3633 attr2->branch_true = NULL;
3634 attr2->branch_false = NULL;
3635 attr2->jumping_attr = NULL;
3636 return attr2;
3637 }
3638
3639 struct mlx5_flow_attr *
3640 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3641 {
3642 struct mlx5_esw_flow_attr *esw_attr;
3643 struct mlx5_flow_attr *attr;
3644 int i;
3645
3646 list_for_each_entry(attr, &flow->attrs, list) {
3647 esw_attr = attr->esw_attr;
3648 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3649 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3650 return attr;
3651 }
3652 }
3653
3654 return NULL;
3655 }
3656
3657 void
3658 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3659 {
3660 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3661 struct mlx5_flow_attr *attr;
3662
3663 list_for_each_entry(attr, &flow->attrs, list) {
3664 if (list_is_last(&attr->list, &flow->attrs))
3665 break;
3666
3667 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3668 }
3669 }
3670
3671 static void
3672 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3673 {
3674 struct mlx5_flow_attr *attr, *tmp;
3675
3676 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3677 if (list_is_last(&attr->list, &flow->attrs))
3678 break;
3679
3680 mlx5_free_flow_attr_actions(flow, attr);
3681
3682 list_del(&attr->list);
3683 kvfree(attr->parse_attr);
3684 kfree(attr);
3685 }
3686 }
3687
3688 int
3689 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3690 {
3691 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3692 struct mlx5_flow_attr *attr;
3693 int err = 0;
3694
3695 list_for_each_entry(attr, &flow->attrs, list) {
3696 if (list_is_last(&attr->list, &flow->attrs))
3697 break;
3698
3699 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3700 if (err)
3701 break;
3702 }
3703
3704 return err;
3705 }
3706
3707 /* TC filter rule HW translation:
3708 *
3709 * +---------------------+
3710 * + ft prio (tc chain) +
3711 * + original match +
3712 * +---------------------+
3713 * |
3714 * | if multi table action
3715 * |
3716 * v
3717 * +---------------------+
3718 * + post act ft |<----.
3719 * + match fte id | | split on multi table action
3720 * + do actions |-----'
3721 * +---------------------+
3722 * |
3723 * |
3724 * v
3725 * Do rest of the actions after last multi table action.
3726 */
3727 static int
3728 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3729 {
3730 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3731 struct mlx5_flow_attr *attr, *next_attr = NULL;
3732 struct mlx5e_post_act_handle *handle;
3733 int err;
3734
3735 /* This is going in reverse order as needed.
3736 * The first entry is the last attribute.
3737 */
3738 list_for_each_entry(attr, &flow->attrs, list) {
3739 if (!next_attr) {
3740 /* Set counter action on last post act rule. */
3741 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3742 }
3743
3744 if (next_attr && !(attr->flags & MLX5_ATTR_FLAG_TERMINATING)) {
3745 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3746 if (err)
3747 goto out_free;
3748 }
3749
3750 /* Don't add post_act rule for first attr (last in the list).
3751 * It's being handled by the caller.
3752 */
3753 if (list_is_last(&attr->list, &flow->attrs))
3754 break;
3755
3756 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3757 if (err)
3758 goto out_free;
3759
3760 err = post_process_attr(flow, attr, extack);
3761 if (err)
3762 goto out_free;
3763
3764 handle = mlx5e_tc_post_act_add(post_act, attr);
3765 if (IS_ERR(handle)) {
3766 err = PTR_ERR(handle);
3767 goto out_free;
3768 }
3769
3770 attr->post_act_handle = handle;
3771
3772 if (attr->jumping_attr) {
3773 err = mlx5e_tc_act_set_next_post_act(flow, attr->jumping_attr, attr);
3774 if (err)
3775 goto out_free;
3776 }
3777
3778 next_attr = attr;
3779 }
3780
3781 if (flow_flag_test(flow, SLOW))
3782 goto out;
3783
3784 err = mlx5e_tc_offload_flow_post_acts(flow);
3785 if (err)
3786 goto out_free;
3787
3788 out:
3789 return 0;
3790
3791 out_free:
3792 free_flow_post_acts(flow);
3793 return err;
3794 }
3795
3796 static int
3797 alloc_branch_attr(struct mlx5e_tc_flow *flow,
3798 struct mlx5e_tc_act_branch_ctrl *cond,
3799 struct mlx5_flow_attr **cond_attr,
3800 u32 *jump_count,
3801 struct netlink_ext_ack *extack)
3802 {
3803 struct mlx5_flow_attr *attr;
3804 int err = 0;
3805
3806 *cond_attr = mlx5e_clone_flow_attr_for_post_act(flow->attr,
3807 mlx5e_get_flow_namespace(flow));
3808 if (!(*cond_attr))
3809 return -ENOMEM;
3810
3811 attr = *cond_attr;
3812
3813 switch (cond->act_id) {
3814 case FLOW_ACTION_DROP:
3815 attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3816 break;
3817 case FLOW_ACTION_ACCEPT:
3818 case FLOW_ACTION_PIPE:
3819 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3820 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
3821 break;
3822 case FLOW_ACTION_JUMP:
3823 if (*jump_count) {
3824 NL_SET_ERR_MSG_MOD(extack, "Cannot offload flows with nested jumps");
3825 err = -EOPNOTSUPP;
3826 goto out_err;
3827 }
3828 *jump_count = cond->extval;
3829 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3830 attr->dest_ft = mlx5e_tc_post_act_get_ft(get_post_action(flow->priv));
3831 break;
3832 default:
3833 err = -EOPNOTSUPP;
3834 goto out_err;
3835 }
3836
3837 return err;
3838 out_err:
3839 kfree(*cond_attr);
3840 *cond_attr = NULL;
3841 return err;
3842 }
3843
3844 static void
3845 dec_jump_count(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3846 struct mlx5_flow_attr *attr, struct mlx5e_priv *priv,
3847 struct mlx5e_tc_jump_state *jump_state)
3848 {
3849 if (!jump_state->jump_count)
3850 return;
3851
3852 /* Single tc action can instantiate multiple offload actions (e.g. pedit)
3853 * Jump only over a tc action
3854 */
3855 if (act->id == jump_state->last_id && act->hw_index == jump_state->last_index)
3856 return;
3857
3858 jump_state->last_id = act->id;
3859 jump_state->last_index = act->hw_index;
3860
3861 /* nothing to do for intermediate actions */
3862 if (--jump_state->jump_count > 1)
3863 return;
3864
3865 if (jump_state->jump_count == 1) { /* last action in the jump action list */
3866
3867 /* create a new attribute after this action */
3868 jump_state->jump_target = true;
3869
3870 if (tc_act->is_terminating_action) { /* the branch ends here */
3871 attr->flags |= MLX5_ATTR_FLAG_TERMINATING;
3872 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3873 } else { /* the branch continues executing the rest of the actions */
3874 struct mlx5e_post_act *post_act;
3875
3876 attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
3877 post_act = get_post_action(priv);
3878 attr->dest_ft = mlx5e_tc_post_act_get_ft(post_act);
3879 }
3880 } else if (jump_state->jump_count == 0) { /* first attr after the jump action list */
3881 /* This is the post action for the jumping attribute (either red or green)
3882 * Use the stored jumping_attr to set the post act id on the jumping attribute
3883 */
3884 attr->jumping_attr = jump_state->jumping_attr;
3885 }
3886 }
3887
3888 static int
3889 parse_branch_ctrl(struct flow_action_entry *act, struct mlx5e_tc_act *tc_act,
3890 struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr,
3891 struct mlx5e_tc_jump_state *jump_state,
3892 struct netlink_ext_ack *extack)
3893 {
3894 struct mlx5e_tc_act_branch_ctrl cond_true, cond_false;
3895 u32 jump_count = jump_state->jump_count;
3896 int err;
3897
3898 if (!tc_act->get_branch_ctrl)
3899 return 0;
3900
3901 tc_act->get_branch_ctrl(act, &cond_true, &cond_false);
3902
3903 err = alloc_branch_attr(flow, &cond_true,
3904 &attr->branch_true, &jump_count, extack);
3905 if (err)
3906 goto out_err;
3907
3908 if (jump_count)
3909 jump_state->jumping_attr = attr->branch_true;
3910
3911 err = alloc_branch_attr(flow, &cond_false,
3912 &attr->branch_false, &jump_count, extack);
3913 if (err)
3914 goto err_branch_false;
3915
3916 if (jump_count && !jump_state->jumping_attr)
3917 jump_state->jumping_attr = attr->branch_false;
3918
3919 jump_state->jump_count = jump_count;
3920
3921 /* branching action requires its own counter */
3922 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3923 flow_flag_set(flow, USE_ACT_STATS);
3924
3925 return 0;
3926
3927 err_branch_false:
3928 free_branch_attr(flow, attr->branch_true);
3929 out_err:
3930 return err;
3931 }
3932
3933 static int
3934 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3935 struct flow_action *flow_action)
3936 {
3937 struct netlink_ext_ack *extack = parse_state->extack;
3938 struct mlx5e_tc_flow *flow = parse_state->flow;
3939 struct mlx5e_tc_jump_state jump_state = {};
3940 struct mlx5_flow_attr *attr = flow->attr;
3941 enum mlx5_flow_namespace_type ns_type;
3942 struct mlx5e_priv *priv = flow->priv;
3943 struct mlx5_flow_attr *prev_attr;
3944 struct flow_action_entry *act;
3945 struct mlx5e_tc_act *tc_act;
3946 bool is_missable;
3947 int err, i;
3948
3949 ns_type = mlx5e_get_flow_namespace(flow);
3950 list_add(&attr->list, &flow->attrs);
3951
3952 flow_action_for_each(i, act, flow_action) {
3953 jump_state.jump_target = false;
3954 is_missable = false;
3955 prev_attr = attr;
3956
3957 tc_act = mlx5e_tc_act_get(act->id, ns_type);
3958 if (!tc_act) {
3959 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3960 err = -EOPNOTSUPP;
3961 goto out_free_post_acts;
3962 }
3963
3964 if (tc_act->can_offload && !tc_act->can_offload(parse_state, act, i, attr)) {
3965 err = -EOPNOTSUPP;
3966 goto out_free_post_acts;
3967 }
3968
3969 err = tc_act->parse_action(parse_state, act, priv, attr);
3970 if (err)
3971 goto out_free_post_acts;
3972
3973 dec_jump_count(act, tc_act, attr, priv, &jump_state);
3974
3975 err = parse_branch_ctrl(act, tc_act, flow, attr, &jump_state, extack);
3976 if (err)
3977 goto out_free_post_acts;
3978
3979 parse_state->actions |= attr->action;
3980
3981 /* Split attr for multi table act if not the last act. */
3982 if (jump_state.jump_target ||
3983 (tc_act->is_multi_table_act &&
3984 tc_act->is_multi_table_act(priv, act, attr) &&
3985 i < flow_action->num_entries - 1)) {
3986 is_missable = tc_act->is_missable ? tc_act->is_missable(act) : false;
3987
3988 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3989 if (err)
3990 goto out_free_post_acts;
3991
3992 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3993 if (!attr) {
3994 err = -ENOMEM;
3995 goto out_free_post_acts;
3996 }
3997
3998 list_add(&attr->list, &flow->attrs);
3999 }
4000
4001 if (is_missable) {
4002 /* Add counter to prev, and assign act to new (next) attr */
4003 prev_attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
4004 flow_flag_set(flow, USE_ACT_STATS);
4005
4006 attr->tc_act_cookies[attr->tc_act_cookies_count++] = act->cookie;
4007 } else if (!tc_act->stats_action) {
4008 prev_attr->tc_act_cookies[prev_attr->tc_act_cookies_count++] = act->cookie;
4009 }
4010 }
4011
4012 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
4013 if (err)
4014 goto out_free_post_acts;
4015
4016 err = alloc_flow_post_acts(flow, extack);
4017 if (err)
4018 goto out_free_post_acts;
4019
4020 return 0;
4021
4022 out_free_post_acts:
4023 free_flow_post_acts(flow);
4024
4025 return err;
4026 }
4027
4028 static int
4029 flow_action_supported(struct flow_action *flow_action,
4030 struct netlink_ext_ack *extack)
4031 {
4032 if (!flow_action_has_entries(flow_action)) {
4033 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
4034 return -EINVAL;
4035 }
4036
4037 if (!flow_action_hw_stats_check(flow_action, extack,
4038 FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
4039 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4040 return -EOPNOTSUPP;
4041 }
4042
4043 return 0;
4044 }
4045
4046 static int
4047 parse_tc_nic_actions(struct mlx5e_priv *priv,
4048 struct flow_action *flow_action,
4049 struct mlx5e_tc_flow *flow,
4050 struct netlink_ext_ack *extack)
4051 {
4052 struct mlx5e_tc_act_parse_state *parse_state;
4053 struct mlx5e_tc_flow_parse_attr *parse_attr;
4054 struct mlx5_flow_attr *attr = flow->attr;
4055 int err;
4056
4057 err = flow_action_supported(flow_action, extack);
4058 if (err)
4059 return err;
4060
4061 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
4062 parse_attr = attr->parse_attr;
4063 parse_state = &parse_attr->parse_state;
4064 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4065 parse_state->ct_priv = get_ct_priv(priv);
4066
4067 err = parse_tc_actions(parse_state, flow_action);
4068 if (err)
4069 return err;
4070
4071 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4072 if (err)
4073 return err;
4074
4075 err = verify_attr_actions(attr->action, extack);
4076 if (err)
4077 return err;
4078
4079 if (!actions_match_supported(priv, flow_action, parse_state->actions,
4080 parse_attr, flow, extack))
4081 return -EOPNOTSUPP;
4082
4083 return 0;
4084 }
4085
4086 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
4087 struct net_device *peer_netdev)
4088 {
4089 struct mlx5e_priv *peer_priv;
4090
4091 peer_priv = netdev_priv(peer_netdev);
4092
4093 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
4094 mlx5e_eswitch_vf_rep(priv->netdev) &&
4095 mlx5e_eswitch_vf_rep(peer_netdev) &&
4096 mlx5e_same_hw_devs(priv, peer_priv));
4097 }
4098
4099 static bool same_hw_reps(struct mlx5e_priv *priv,
4100 struct net_device *peer_netdev)
4101 {
4102 struct mlx5e_priv *peer_priv;
4103
4104 peer_priv = netdev_priv(peer_netdev);
4105
4106 return mlx5e_eswitch_rep(priv->netdev) &&
4107 mlx5e_eswitch_rep(peer_netdev) &&
4108 mlx5e_same_hw_devs(priv, peer_priv);
4109 }
4110
4111 static bool is_lag_dev(struct mlx5e_priv *priv,
4112 struct net_device *peer_netdev)
4113 {
4114 return ((mlx5_lag_is_sriov(priv->mdev) ||
4115 mlx5_lag_is_multipath(priv->mdev)) &&
4116 same_hw_reps(priv, peer_netdev));
4117 }
4118
4119 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
4120 {
4121 return same_hw_reps(priv, out_dev) && mlx5_lag_is_mpesw(priv->mdev);
4122 }
4123
4124 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
4125 struct net_device *out_dev)
4126 {
4127 if (is_merged_eswitch_vfs(priv, out_dev))
4128 return true;
4129
4130 if (is_multiport_eligible(priv, out_dev))
4131 return true;
4132
4133 if (is_lag_dev(priv, out_dev))
4134 return true;
4135
4136 return mlx5e_eswitch_rep(out_dev) &&
4137 same_port_devs(priv, netdev_priv(out_dev));
4138 }
4139
4140 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
4141 struct mlx5_flow_attr *attr,
4142 int ifindex,
4143 enum mlx5e_tc_int_port_type type,
4144 u32 *action,
4145 int out_index)
4146 {
4147 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4148 struct mlx5e_tc_int_port_priv *int_port_priv;
4149 struct mlx5e_tc_flow_parse_attr *parse_attr;
4150 struct mlx5e_tc_int_port *dest_int_port;
4151 int err;
4152
4153 parse_attr = attr->parse_attr;
4154 int_port_priv = mlx5e_get_int_port_priv(priv);
4155
4156 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
4157 if (IS_ERR(dest_int_port))
4158 return PTR_ERR(dest_int_port);
4159
4160 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
4161 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
4162 mlx5e_tc_int_port_get_metadata(dest_int_port));
4163 if (err) {
4164 mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
4165 return err;
4166 }
4167
4168 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
4169
4170 esw_attr->dest_int_port = dest_int_port;
4171 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
4172 esw_attr->split_count = out_index;
4173
4174 /* Forward to root fdb for matching against the new source vport */
4175 attr->dest_chain = 0;
4176
4177 return 0;
4178 }
4179
4180 static int
4181 parse_tc_fdb_actions(struct mlx5e_priv *priv,
4182 struct flow_action *flow_action,
4183 struct mlx5e_tc_flow *flow,
4184 struct netlink_ext_ack *extack)
4185 {
4186 struct mlx5e_tc_act_parse_state *parse_state;
4187 struct mlx5e_tc_flow_parse_attr *parse_attr;
4188 struct mlx5_flow_attr *attr = flow->attr;
4189 struct mlx5_esw_flow_attr *esw_attr;
4190 struct net_device *filter_dev;
4191 int err;
4192
4193 err = flow_action_supported(flow_action, extack);
4194 if (err)
4195 return err;
4196
4197 esw_attr = attr->esw_attr;
4198 parse_attr = attr->parse_attr;
4199 filter_dev = parse_attr->filter_dev;
4200 parse_state = &parse_attr->parse_state;
4201 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
4202 parse_state->ct_priv = get_ct_priv(priv);
4203
4204 err = parse_tc_actions(parse_state, flow_action);
4205 if (err)
4206 return err;
4207
4208 /* Forward to/from internal port can only have 1 dest */
4209 if ((netif_is_ovs_master(filter_dev) || esw_attr->dest_int_port) &&
4210 esw_attr->out_count > 1) {
4211 NL_SET_ERR_MSG_MOD(extack,
4212 "Rules with internal port can have only one destination");
4213 return -EOPNOTSUPP;
4214 }
4215
4216 /* Forward from tunnel/internal port to internal port is not supported */
4217 if ((mlx5e_get_tc_tun(filter_dev) || netif_is_ovs_master(filter_dev)) &&
4218 esw_attr->dest_int_port) {
4219 NL_SET_ERR_MSG_MOD(extack,
4220 "Forwarding from tunnel/internal port to internal port is not supported");
4221 return -EOPNOTSUPP;
4222 }
4223
4224 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
4225 if (err)
4226 return err;
4227
4228 if (!actions_match_supported(priv, flow_action, parse_state->actions,
4229 parse_attr, flow, extack))
4230 return -EOPNOTSUPP;
4231
4232 return 0;
4233 }
4234
4235 static void get_flags(int flags, unsigned long *flow_flags)
4236 {
4237 unsigned long __flow_flags = 0;
4238
4239 if (flags & MLX5_TC_FLAG(INGRESS))
4240 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
4241 if (flags & MLX5_TC_FLAG(EGRESS))
4242 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
4243
4244 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
4245 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4246 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
4247 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4248 if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
4249 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
4250
4251 *flow_flags = __flow_flags;
4252 }
4253
4254 static const struct rhashtable_params tc_ht_params = {
4255 .head_offset = offsetof(struct mlx5e_tc_flow, node),
4256 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
4257 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
4258 .automatic_shrinking = true,
4259 };
4260
4261 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
4262 unsigned long flags)
4263 {
4264 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
4265 struct mlx5e_rep_priv *rpriv;
4266
4267 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
4268 rpriv = priv->ppriv;
4269 return &rpriv->tc_ht;
4270 } else /* NIC offload */
4271 return &tc->ht;
4272 }
4273
4274 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
4275 {
4276 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
4277 struct mlx5_flow_attr *attr = flow->attr;
4278 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
4279 flow_flag_test(flow, INGRESS);
4280 bool act_is_encap = !!(attr->action &
4281 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
4282 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
4283 MLX5_DEVCOM_ESW_OFFLOADS);
4284
4285 if (!esw_paired)
4286 return false;
4287
4288 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
4289 mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
4290 (is_rep_ingress || act_is_encap))
4291 return true;
4292
4293 if (mlx5_lag_is_mpesw(esw_attr->in_mdev))
4294 return true;
4295
4296 return false;
4297 }
4298
4299 struct mlx5_flow_attr *
4300 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
4301 {
4302 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
4303 sizeof(struct mlx5_esw_flow_attr) :
4304 sizeof(struct mlx5_nic_flow_attr);
4305 struct mlx5_flow_attr *attr;
4306
4307 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
4308 if (!attr)
4309 return attr;
4310
4311 INIT_LIST_HEAD(&attr->list);
4312 return attr;
4313 }
4314
4315 static void
4316 mlx5_free_flow_attr_actions(struct mlx5e_tc_flow *flow, struct mlx5_flow_attr *attr)
4317 {
4318 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
4319
4320 if (!attr)
4321 return;
4322
4323 if (attr->post_act_handle)
4324 mlx5e_tc_post_act_del(get_post_action(flow->priv), attr->post_act_handle);
4325
4326 clean_encap_dests(flow->priv, flow, attr);
4327
4328 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
4329 mlx5_fc_destroy(counter_dev, attr->counter);
4330
4331 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
4332 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
4333 mlx5e_tc_detach_mod_hdr(flow->priv, flow, attr);
4334 }
4335
4336 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
4337
4338 free_branch_attr(flow, attr->branch_true);
4339 free_branch_attr(flow, attr->branch_false);
4340 }
4341
4342 static int
4343 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
4344 struct flow_cls_offload *f, unsigned long flow_flags,
4345 struct mlx5e_tc_flow_parse_attr **__parse_attr,
4346 struct mlx5e_tc_flow **__flow)
4347 {
4348 struct mlx5e_tc_flow_parse_attr *parse_attr;
4349 struct mlx5_flow_attr *attr;
4350 struct mlx5e_tc_flow *flow;
4351 int err = -ENOMEM;
4352 int out_index;
4353
4354 flow = kzalloc(sizeof(*flow), GFP_KERNEL);
4355 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
4356 if (!parse_attr || !flow)
4357 goto err_free;
4358
4359 flow->flags = flow_flags;
4360 flow->cookie = f->cookie;
4361 flow->priv = priv;
4362
4363 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4364 if (!attr)
4365 goto err_free;
4366
4367 flow->attr = attr;
4368
4369 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4370 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4371 INIT_LIST_HEAD(&flow->hairpin);
4372 INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4373 INIT_LIST_HEAD(&flow->attrs);
4374 refcount_set(&flow->refcnt, 1);
4375 init_completion(&flow->init_done);
4376 init_completion(&flow->del_hw_done);
4377
4378 *__flow = flow;
4379 *__parse_attr = parse_attr;
4380
4381 return 0;
4382
4383 err_free:
4384 kfree(flow);
4385 kvfree(parse_attr);
4386 return err;
4387 }
4388
4389 static void
4390 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4391 struct mlx5e_tc_flow_parse_attr *parse_attr,
4392 struct flow_cls_offload *f)
4393 {
4394 attr->parse_attr = parse_attr;
4395 attr->chain = f->common.chain_index;
4396 attr->prio = f->common.prio;
4397 }
4398
4399 static void
4400 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4401 struct mlx5e_priv *priv,
4402 struct mlx5e_tc_flow_parse_attr *parse_attr,
4403 struct flow_cls_offload *f,
4404 struct mlx5_eswitch_rep *in_rep,
4405 struct mlx5_core_dev *in_mdev)
4406 {
4407 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4408 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4409
4410 mlx5e_flow_attr_init(attr, parse_attr, f);
4411
4412 esw_attr->in_rep = in_rep;
4413 esw_attr->in_mdev = in_mdev;
4414
4415 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4416 MLX5_COUNTER_SOURCE_ESWITCH)
4417 esw_attr->counter_dev = in_mdev;
4418 else
4419 esw_attr->counter_dev = priv->mdev;
4420 }
4421
4422 static struct mlx5e_tc_flow *
4423 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4424 struct flow_cls_offload *f,
4425 unsigned long flow_flags,
4426 struct net_device *filter_dev,
4427 struct mlx5_eswitch_rep *in_rep,
4428 struct mlx5_core_dev *in_mdev)
4429 {
4430 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4431 struct netlink_ext_ack *extack = f->common.extack;
4432 struct mlx5e_tc_flow_parse_attr *parse_attr;
4433 struct mlx5e_tc_flow *flow;
4434 int attr_size, err;
4435
4436 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4437 attr_size = sizeof(struct mlx5_esw_flow_attr);
4438 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4439 &parse_attr, &flow);
4440 if (err)
4441 goto out;
4442
4443 parse_attr->filter_dev = filter_dev;
4444 mlx5e_flow_esw_attr_init(flow->attr,
4445 priv, parse_attr,
4446 f, in_rep, in_mdev);
4447
4448 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4449 f, filter_dev);
4450 if (err)
4451 goto err_free;
4452
4453 /* actions validation depends on parsing the ct matches first */
4454 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4455 &flow->attr->ct_attr, extack);
4456 if (err)
4457 goto err_free;
4458
4459 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4460 if (err)
4461 goto err_free;
4462
4463 err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4464 complete_all(&flow->init_done);
4465 if (err) {
4466 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4467 goto err_free;
4468
4469 add_unready_flow(flow);
4470 }
4471
4472 return flow;
4473
4474 err_free:
4475 mlx5e_flow_put(priv, flow);
4476 out:
4477 return ERR_PTR(err);
4478 }
4479
4480 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4481 struct mlx5e_tc_flow *flow,
4482 unsigned long flow_flags)
4483 {
4484 struct mlx5e_priv *priv = flow->priv, *peer_priv;
4485 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4486 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4487 struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4488 struct mlx5e_tc_flow_parse_attr *parse_attr;
4489 struct mlx5e_rep_priv *peer_urpriv;
4490 struct mlx5e_tc_flow *peer_flow;
4491 struct mlx5_core_dev *in_mdev;
4492 int err = 0;
4493
4494 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4495 if (!peer_esw)
4496 return -ENODEV;
4497
4498 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4499 peer_priv = netdev_priv(peer_urpriv->netdev);
4500
4501 /* in_mdev is assigned of which the packet originated from.
4502 * So packets redirected to uplink use the same mdev of the
4503 * original flow and packets redirected from uplink use the
4504 * peer mdev.
4505 * In multiport eswitch it's a special case that we need to
4506 * keep the original mdev.
4507 */
4508 if (attr->in_rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(priv->mdev))
4509 in_mdev = peer_priv->mdev;
4510 else
4511 in_mdev = priv->mdev;
4512
4513 parse_attr = flow->attr->parse_attr;
4514 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4515 parse_attr->filter_dev,
4516 attr->in_rep, in_mdev);
4517 if (IS_ERR(peer_flow)) {
4518 err = PTR_ERR(peer_flow);
4519 goto out;
4520 }
4521
4522 flow->peer_flow = peer_flow;
4523 flow_flag_set(flow, DUP);
4524 mutex_lock(&esw->offloads.peer_mutex);
4525 list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4526 mutex_unlock(&esw->offloads.peer_mutex);
4527
4528 out:
4529 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4530 return err;
4531 }
4532
4533 static int
4534 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4535 struct flow_cls_offload *f,
4536 unsigned long flow_flags,
4537 struct net_device *filter_dev,
4538 struct mlx5e_tc_flow **__flow)
4539 {
4540 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4541 struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4542 struct mlx5_core_dev *in_mdev = priv->mdev;
4543 struct mlx5e_tc_flow *flow;
4544 int err;
4545
4546 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4547 in_mdev);
4548 if (IS_ERR(flow))
4549 return PTR_ERR(flow);
4550
4551 if (is_peer_flow_needed(flow)) {
4552 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4553 if (err) {
4554 mlx5e_tc_del_fdb_flow(priv, flow);
4555 goto out;
4556 }
4557 }
4558
4559 *__flow = flow;
4560
4561 return 0;
4562
4563 out:
4564 return err;
4565 }
4566
4567 static int
4568 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4569 struct flow_cls_offload *f,
4570 unsigned long flow_flags,
4571 struct net_device *filter_dev,
4572 struct mlx5e_tc_flow **__flow)
4573 {
4574 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4575 struct netlink_ext_ack *extack = f->common.extack;
4576 struct mlx5e_tc_flow_parse_attr *parse_attr;
4577 struct mlx5e_tc_flow *flow;
4578 int attr_size, err;
4579
4580 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4581 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4582 return -EOPNOTSUPP;
4583 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4584 return -EOPNOTSUPP;
4585 }
4586
4587 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4588 attr_size = sizeof(struct mlx5_nic_flow_attr);
4589 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4590 &parse_attr, &flow);
4591 if (err)
4592 goto out;
4593
4594 parse_attr->filter_dev = filter_dev;
4595 mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4596
4597 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4598 f, filter_dev);
4599 if (err)
4600 goto err_free;
4601
4602 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4603 &flow->attr->ct_attr, extack);
4604 if (err)
4605 goto err_free;
4606
4607 err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4608 if (err)
4609 goto err_free;
4610
4611 err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4612 if (err)
4613 goto err_free;
4614
4615 flow_flag_set(flow, OFFLOADED);
4616 *__flow = flow;
4617
4618 return 0;
4619
4620 err_free:
4621 flow_flag_set(flow, FAILED);
4622 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4623 mlx5e_flow_put(priv, flow);
4624 out:
4625 return err;
4626 }
4627
4628 static int
4629 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4630 struct flow_cls_offload *f,
4631 unsigned long flags,
4632 struct net_device *filter_dev,
4633 struct mlx5e_tc_flow **flow)
4634 {
4635 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4636 unsigned long flow_flags;
4637 int err;
4638
4639 get_flags(flags, &flow_flags);
4640
4641 if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4642 return -EOPNOTSUPP;
4643
4644 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4645 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4646 filter_dev, flow);
4647 else
4648 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4649 filter_dev, flow);
4650
4651 return err;
4652 }
4653
4654 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4655 struct mlx5e_rep_priv *rpriv)
4656 {
4657 /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4658 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4659 * function is called from NIC mode.
4660 */
4661 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4662 }
4663
4664 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4665 struct flow_cls_offload *f, unsigned long flags)
4666 {
4667 struct netlink_ext_ack *extack = f->common.extack;
4668 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4669 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4670 struct mlx5e_tc_flow *flow;
4671 int err = 0;
4672
4673 if (!mlx5_esw_hold(priv->mdev))
4674 return -EBUSY;
4675
4676 mlx5_esw_get(priv->mdev);
4677
4678 rcu_read_lock();
4679 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4680 if (flow) {
4681 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4682 * just return 0.
4683 */
4684 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4685 goto rcu_unlock;
4686
4687 NL_SET_ERR_MSG_MOD(extack,
4688 "flow cookie already exists, ignoring");
4689 netdev_warn_once(priv->netdev,
4690 "flow cookie %lx already exists, ignoring\n",
4691 f->cookie);
4692 err = -EEXIST;
4693 goto rcu_unlock;
4694 }
4695 rcu_unlock:
4696 rcu_read_unlock();
4697 if (flow)
4698 goto out;
4699
4700 trace_mlx5e_configure_flower(f);
4701 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4702 if (err)
4703 goto out;
4704
4705 /* Flow rule offloaded to non-uplink representor sharing tc block,
4706 * set the flow's owner dev.
4707 */
4708 if (is_flow_rule_duplicate_allowed(dev, rpriv))
4709 flow->orig_dev = dev;
4710
4711 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4712 if (err)
4713 goto err_free;
4714
4715 mlx5_esw_release(priv->mdev);
4716 return 0;
4717
4718 err_free:
4719 mlx5e_flow_put(priv, flow);
4720 out:
4721 mlx5_esw_put(priv->mdev);
4722 mlx5_esw_release(priv->mdev);
4723 return err;
4724 }
4725
4726 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4727 {
4728 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4729 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4730
4731 return flow_flag_test(flow, INGRESS) == dir_ingress &&
4732 flow_flag_test(flow, EGRESS) == dir_egress;
4733 }
4734
4735 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4736 struct flow_cls_offload *f, unsigned long flags)
4737 {
4738 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4739 struct mlx5e_tc_flow *flow;
4740 int err;
4741
4742 rcu_read_lock();
4743 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4744 if (!flow || !same_flow_direction(flow, flags)) {
4745 err = -EINVAL;
4746 goto errout;
4747 }
4748
4749 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4750 * set.
4751 */
4752 if (flow_flag_test_and_set(flow, DELETED)) {
4753 err = -EINVAL;
4754 goto errout;
4755 }
4756 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4757 rcu_read_unlock();
4758
4759 trace_mlx5e_delete_flower(f);
4760 mlx5e_flow_put(priv, flow);
4761
4762 mlx5_esw_put(priv->mdev);
4763 return 0;
4764
4765 errout:
4766 rcu_read_unlock();
4767 return err;
4768 }
4769
4770 int mlx5e_tc_fill_action_stats(struct mlx5e_priv *priv,
4771 struct flow_offload_action *fl_act)
4772 {
4773 return mlx5e_tc_act_stats_fill_stats(get_act_stats_handle(priv), fl_act);
4774 }
4775
4776 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4777 struct flow_cls_offload *f, unsigned long flags)
4778 {
4779 struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4780 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4781 struct mlx5_eswitch *peer_esw;
4782 struct mlx5e_tc_flow *flow;
4783 struct mlx5_fc *counter;
4784 u64 lastuse = 0;
4785 u64 packets = 0;
4786 u64 bytes = 0;
4787 int err = 0;
4788
4789 rcu_read_lock();
4790 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4791 tc_ht_params));
4792 rcu_read_unlock();
4793 if (IS_ERR(flow))
4794 return PTR_ERR(flow);
4795
4796 if (!same_flow_direction(flow, flags)) {
4797 err = -EINVAL;
4798 goto errout;
4799 }
4800
4801 if (mlx5e_is_offloaded_flow(flow)) {
4802 if (flow_flag_test(flow, USE_ACT_STATS)) {
4803 f->use_act_stats = true;
4804 } else {
4805 counter = mlx5e_tc_get_counter(flow);
4806 if (!counter)
4807 goto errout;
4808
4809 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4810 }
4811 }
4812
4813 /* Under multipath it's possible for one rule to be currently
4814 * un-offloaded while the other rule is offloaded.
4815 */
4816 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4817 if (!peer_esw)
4818 goto out;
4819
4820 if (flow_flag_test(flow, DUP) &&
4821 flow_flag_test(flow->peer_flow, OFFLOADED)) {
4822 u64 bytes2;
4823 u64 packets2;
4824 u64 lastuse2;
4825
4826 if (flow_flag_test(flow, USE_ACT_STATS)) {
4827 f->use_act_stats = true;
4828 } else {
4829 counter = mlx5e_tc_get_counter(flow->peer_flow);
4830 if (!counter)
4831 goto no_peer_counter;
4832 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4833
4834 bytes += bytes2;
4835 packets += packets2;
4836 lastuse = max_t(u64, lastuse, lastuse2);
4837 }
4838 }
4839
4840 no_peer_counter:
4841 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4842 out:
4843 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4844 FLOW_ACTION_HW_STATS_DELAYED);
4845 trace_mlx5e_stats_flower(f);
4846 errout:
4847 mlx5e_flow_put(priv, flow);
4848 return err;
4849 }
4850
4851 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4852 struct netlink_ext_ack *extack)
4853 {
4854 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4855 struct mlx5_eswitch *esw;
4856 u32 rate_mbps = 0;
4857 u16 vport_num;
4858 int err;
4859
4860 vport_num = rpriv->rep->vport;
4861 if (vport_num >= MLX5_VPORT_ECPF) {
4862 NL_SET_ERR_MSG_MOD(extack,
4863 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4864 return -EOPNOTSUPP;
4865 }
4866
4867 esw = priv->mdev->priv.eswitch;
4868 /* rate is given in bytes/sec.
4869 * First convert to bits/sec and then round to the nearest mbit/secs.
4870 * mbit means million bits.
4871 * Moreover, if rate is non zero we choose to configure to a minimum of
4872 * 1 mbit/sec.
4873 */
4874 if (rate) {
4875 rate = (rate * BITS_PER_BYTE) + 500000;
4876 do_div(rate, 1000000);
4877 rate_mbps = max_t(u32, rate, 1);
4878 }
4879
4880 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4881 if (err)
4882 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4883
4884 return err;
4885 }
4886
4887 static int
4888 tc_matchall_police_validate(const struct flow_action *action,
4889 const struct flow_action_entry *act,
4890 struct netlink_ext_ack *extack)
4891 {
4892 if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
4893 NL_SET_ERR_MSG_MOD(extack,
4894 "Offload not supported when conform action is not continue");
4895 return -EOPNOTSUPP;
4896 }
4897
4898 if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4899 NL_SET_ERR_MSG_MOD(extack,
4900 "Offload not supported when exceed action is not drop");
4901 return -EOPNOTSUPP;
4902 }
4903
4904 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4905 !flow_action_is_last_entry(action, act)) {
4906 NL_SET_ERR_MSG_MOD(extack,
4907 "Offload not supported when conform action is ok, but action is not last");
4908 return -EOPNOTSUPP;
4909 }
4910
4911 if (act->police.peakrate_bytes_ps ||
4912 act->police.avrate || act->police.overhead) {
4913 NL_SET_ERR_MSG_MOD(extack,
4914 "Offload not supported when peakrate/avrate/overhead is configured");
4915 return -EOPNOTSUPP;
4916 }
4917
4918 return 0;
4919 }
4920
4921 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4922 struct flow_action *flow_action,
4923 struct netlink_ext_ack *extack)
4924 {
4925 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4926 const struct flow_action_entry *act;
4927 int err;
4928 int i;
4929
4930 if (!flow_action_has_entries(flow_action)) {
4931 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4932 return -EINVAL;
4933 }
4934
4935 if (!flow_offload_has_one_action(flow_action)) {
4936 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4937 return -EOPNOTSUPP;
4938 }
4939
4940 if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4941 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4942 return -EOPNOTSUPP;
4943 }
4944
4945 flow_action_for_each(i, act, flow_action) {
4946 switch (act->id) {
4947 case FLOW_ACTION_POLICE:
4948 err = tc_matchall_police_validate(flow_action, act, extack);
4949 if (err)
4950 return err;
4951
4952 err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4953 if (err)
4954 return err;
4955
4956 rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4957 break;
4958 default:
4959 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4960 return -EOPNOTSUPP;
4961 }
4962 }
4963
4964 return 0;
4965 }
4966
4967 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4968 struct tc_cls_matchall_offload *ma)
4969 {
4970 struct netlink_ext_ack *extack = ma->common.extack;
4971
4972 if (ma->common.prio != 1) {
4973 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4974 return -EINVAL;
4975 }
4976
4977 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4978 }
4979
4980 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4981 struct tc_cls_matchall_offload *ma)
4982 {
4983 struct netlink_ext_ack *extack = ma->common.extack;
4984
4985 return apply_police_params(priv, 0, extack);
4986 }
4987
4988 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4989 struct tc_cls_matchall_offload *ma)
4990 {
4991 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4992 struct rtnl_link_stats64 cur_stats;
4993 u64 dbytes;
4994 u64 dpkts;
4995
4996 cur_stats = priv->stats.vf_vport;
4997 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4998 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4999 rpriv->prev_vf_vport_stats = cur_stats;
5000 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
5001 FLOW_ACTION_HW_STATS_DELAYED);
5002 }
5003
5004 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
5005 struct mlx5e_priv *peer_priv)
5006 {
5007 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5008 struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
5009 struct mlx5e_hairpin_entry *hpe, *tmp;
5010 LIST_HEAD(init_wait_list);
5011 u16 peer_vhca_id;
5012 int bkt;
5013
5014 if (!mlx5e_same_hw_devs(priv, peer_priv))
5015 return;
5016
5017 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
5018
5019 mutex_lock(&tc->hairpin_tbl_lock);
5020 hash_for_each(tc->hairpin_tbl, bkt, hpe, hairpin_hlist)
5021 if (refcount_inc_not_zero(&hpe->refcnt))
5022 list_add(&hpe->dead_peer_wait_list, &init_wait_list);
5023 mutex_unlock(&tc->hairpin_tbl_lock);
5024
5025 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
5026 wait_for_completion(&hpe->res_ready);
5027 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
5028 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
5029
5030 mlx5e_hairpin_put(priv, hpe);
5031 }
5032 }
5033
5034 static int mlx5e_tc_netdev_event(struct notifier_block *this,
5035 unsigned long event, void *ptr)
5036 {
5037 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
5038 struct mlx5e_priv *peer_priv;
5039 struct mlx5e_tc_table *tc;
5040 struct mlx5e_priv *priv;
5041
5042 if (ndev->netdev_ops != &mlx5e_netdev_ops ||
5043 event != NETDEV_UNREGISTER ||
5044 ndev->reg_state == NETREG_REGISTERED)
5045 return NOTIFY_DONE;
5046
5047 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
5048 priv = tc->priv;
5049 peer_priv = netdev_priv(ndev);
5050 if (priv == peer_priv ||
5051 !(priv->netdev->features & NETIF_F_HW_TC))
5052 return NOTIFY_DONE;
5053
5054 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
5055
5056 return NOTIFY_DONE;
5057 }
5058
5059 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
5060 {
5061 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5062 struct mlx5_flow_table **ft = &tc->miss_t;
5063 struct mlx5_flow_table_attr ft_attr = {};
5064 struct mlx5_flow_namespace *ns;
5065 int err = 0;
5066
5067 ft_attr.max_fte = 1;
5068 ft_attr.autogroup.max_num_groups = 1;
5069 ft_attr.level = MLX5E_TC_MISS_LEVEL;
5070 ft_attr.prio = 0;
5071 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
5072
5073 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
5074 if (IS_ERR(*ft)) {
5075 err = PTR_ERR(*ft);
5076 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
5077 }
5078
5079 return err;
5080 }
5081
5082 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
5083 {
5084 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5085
5086 mlx5_destroy_flow_table(tc->miss_t);
5087 }
5088
5089 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
5090 {
5091 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5092 struct mlx5_core_dev *dev = priv->mdev;
5093 struct mapping_ctx *chains_mapping;
5094 struct mlx5_chains_attr attr = {};
5095 u64 mapping_id;
5096 int err;
5097
5098 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
5099 mutex_init(&tc->t_lock);
5100 mutex_init(&tc->hairpin_tbl_lock);
5101 hash_init(tc->hairpin_tbl);
5102 tc->priv = priv;
5103
5104 err = rhashtable_init(&tc->ht, &tc_ht_params);
5105 if (err)
5106 return err;
5107
5108 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
5109 lockdep_init_map(&tc->ht.run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5110
5111 mapping_id = mlx5_query_nic_system_image_guid(dev);
5112
5113 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
5114 sizeof(struct mlx5_mapped_obj),
5115 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
5116
5117 if (IS_ERR(chains_mapping)) {
5118 err = PTR_ERR(chains_mapping);
5119 goto err_mapping;
5120 }
5121 tc->mapping = chains_mapping;
5122
5123 err = mlx5e_tc_nic_create_miss_table(priv);
5124 if (err)
5125 goto err_chains;
5126
5127 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
5128 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
5129 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
5130 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
5131 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
5132 attr.default_ft = tc->miss_t;
5133 attr.mapping = chains_mapping;
5134 attr.fs_base_prio = MLX5E_TC_PRIO;
5135
5136 tc->chains = mlx5_chains_create(dev, &attr);
5137 if (IS_ERR(tc->chains)) {
5138 err = PTR_ERR(tc->chains);
5139 goto err_miss;
5140 }
5141
5142 mlx5_chains_print_info(tc->chains);
5143
5144 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
5145 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &tc->mod_hdr,
5146 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
5147
5148 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
5149 err = register_netdevice_notifier_dev_net(priv->netdev,
5150 &tc->netdevice_nb,
5151 &tc->netdevice_nn);
5152 if (err) {
5153 tc->netdevice_nb.notifier_call = NULL;
5154 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
5155 goto err_reg;
5156 }
5157
5158 mlx5e_tc_debugfs_init(tc, mlx5e_fs_get_debugfs_root(priv->fs));
5159
5160 tc->action_stats_handle = mlx5e_tc_act_stats_create();
5161 if (IS_ERR(tc->action_stats_handle)) {
5162 err = PTR_ERR(tc->action_stats_handle);
5163 goto err_act_stats;
5164 }
5165
5166 return 0;
5167
5168 err_act_stats:
5169 unregister_netdevice_notifier_dev_net(priv->netdev,
5170 &tc->netdevice_nb,
5171 &tc->netdevice_nn);
5172 err_reg:
5173 mlx5_tc_ct_clean(tc->ct);
5174 mlx5e_tc_post_act_destroy(tc->post_act);
5175 mlx5_chains_destroy(tc->chains);
5176 err_miss:
5177 mlx5e_tc_nic_destroy_miss_table(priv);
5178 err_chains:
5179 mapping_destroy(chains_mapping);
5180 err_mapping:
5181 rhashtable_destroy(&tc->ht);
5182 return err;
5183 }
5184
5185 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
5186 {
5187 struct mlx5e_tc_flow *flow = ptr;
5188 struct mlx5e_priv *priv = flow->priv;
5189
5190 mlx5e_tc_del_flow(priv, flow);
5191 kfree(flow);
5192 }
5193
5194 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
5195 {
5196 struct mlx5e_tc_table *tc = mlx5e_fs_get_tc(priv->fs);
5197
5198 debugfs_remove_recursive(tc->dfs_root);
5199
5200 if (tc->netdevice_nb.notifier_call)
5201 unregister_netdevice_notifier_dev_net(priv->netdev,
5202 &tc->netdevice_nb,
5203 &tc->netdevice_nn);
5204
5205 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
5206 mutex_destroy(&tc->hairpin_tbl_lock);
5207
5208 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
5209
5210 if (!IS_ERR_OR_NULL(tc->t)) {
5211 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
5212 tc->t = NULL;
5213 }
5214 mutex_destroy(&tc->t_lock);
5215
5216 mlx5_tc_ct_clean(tc->ct);
5217 mlx5e_tc_post_act_destroy(tc->post_act);
5218 mapping_destroy(tc->mapping);
5219 mlx5_chains_destroy(tc->chains);
5220 mlx5e_tc_nic_destroy_miss_table(priv);
5221 mlx5e_tc_act_stats_free(tc->action_stats_handle);
5222 }
5223
5224 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
5225 {
5226 int err;
5227
5228 err = rhashtable_init(tc_ht, &tc_ht_params);
5229 if (err)
5230 return err;
5231
5232 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
5233 lockdep_init_map(&tc_ht->run_work.lockdep_map, "tc_ht_wq_key", &tc_ht_wq_key, 0);
5234
5235 return 0;
5236 }
5237
5238 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
5239 {
5240 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
5241 }
5242
5243 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
5244 {
5245 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
5246 struct mlx5e_rep_priv *rpriv;
5247 struct mapping_ctx *mapping;
5248 struct mlx5_eswitch *esw;
5249 struct mlx5e_priv *priv;
5250 u64 mapping_id;
5251 int err = 0;
5252
5253 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5254 priv = netdev_priv(rpriv->netdev);
5255 esw = priv->mdev->priv.eswitch;
5256
5257 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
5258 MLX5_FLOW_NAMESPACE_FDB);
5259 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
5260 esw_chains(esw),
5261 &esw->offloads.mod_hdr,
5262 MLX5_FLOW_NAMESPACE_FDB,
5263 uplink_priv->post_act);
5264
5265 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
5266
5267 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
5268
5269 mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
5270
5271 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
5272 sizeof(struct tunnel_match_key),
5273 TUNNEL_INFO_BITS_MASK, true);
5274
5275 if (IS_ERR(mapping)) {
5276 err = PTR_ERR(mapping);
5277 goto err_tun_mapping;
5278 }
5279 uplink_priv->tunnel_mapping = mapping;
5280
5281 /* Two last values are reserved for stack devices slow path table mark
5282 * and bridge ingress push mark.
5283 */
5284 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
5285 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
5286 if (IS_ERR(mapping)) {
5287 err = PTR_ERR(mapping);
5288 goto err_enc_opts_mapping;
5289 }
5290 uplink_priv->tunnel_enc_opts_mapping = mapping;
5291
5292 uplink_priv->encap = mlx5e_tc_tun_init(priv);
5293 if (IS_ERR(uplink_priv->encap)) {
5294 err = PTR_ERR(uplink_priv->encap);
5295 goto err_register_fib_notifier;
5296 }
5297
5298 uplink_priv->action_stats_handle = mlx5e_tc_act_stats_create();
5299 if (IS_ERR(uplink_priv->action_stats_handle)) {
5300 err = PTR_ERR(uplink_priv->action_stats_handle);
5301 goto err_action_counter;
5302 }
5303
5304 return 0;
5305
5306 err_action_counter:
5307 mlx5e_tc_tun_cleanup(uplink_priv->encap);
5308 err_register_fib_notifier:
5309 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5310 err_enc_opts_mapping:
5311 mapping_destroy(uplink_priv->tunnel_mapping);
5312 err_tun_mapping:
5313 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5314 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5315 mlx5_tc_ct_clean(uplink_priv->ct_priv);
5316 netdev_warn(priv->netdev,
5317 "Failed to initialize tc (eswitch), err: %d", err);
5318 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5319 return err;
5320 }
5321
5322 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
5323 {
5324 struct mlx5e_rep_priv *rpriv;
5325 struct mlx5_eswitch *esw;
5326 struct mlx5e_priv *priv;
5327
5328 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
5329 priv = netdev_priv(rpriv->netdev);
5330 esw = priv->mdev->priv.eswitch;
5331
5332 mlx5e_tc_clean_fdb_peer_flows(esw);
5333
5334 mlx5e_tc_tun_cleanup(uplink_priv->encap);
5335
5336 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
5337 mapping_destroy(uplink_priv->tunnel_mapping);
5338
5339 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
5340 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
5341 mlx5_tc_ct_clean(uplink_priv->ct_priv);
5342 mlx5e_flow_meters_cleanup(uplink_priv->flow_meters);
5343 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
5344 mlx5e_tc_act_stats_free(uplink_priv->action_stats_handle);
5345 }
5346
5347 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
5348 {
5349 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
5350
5351 return atomic_read(&tc_ht->nelems);
5352 }
5353
5354 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
5355 {
5356 struct mlx5e_tc_flow *flow, *tmp;
5357
5358 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
5359 __mlx5e_tc_del_fdb_peer_flow(flow);
5360 }
5361
5362 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
5363 {
5364 struct mlx5_rep_uplink_priv *rpriv =
5365 container_of(work, struct mlx5_rep_uplink_priv,
5366 reoffload_flows_work);
5367 struct mlx5e_tc_flow *flow, *tmp;
5368
5369 mutex_lock(&rpriv->unready_flows_lock);
5370 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
5371 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
5372 unready_flow_del(flow);
5373 }
5374 mutex_unlock(&rpriv->unready_flows_lock);
5375 }
5376
5377 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
5378 struct flow_cls_offload *cls_flower,
5379 unsigned long flags)
5380 {
5381 switch (cls_flower->command) {
5382 case FLOW_CLS_REPLACE:
5383 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5384 flags);
5385 case FLOW_CLS_DESTROY:
5386 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5387 flags);
5388 case FLOW_CLS_STATS:
5389 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5390 flags);
5391 default:
5392 return -EOPNOTSUPP;
5393 }
5394 }
5395
5396 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5397 void *cb_priv)
5398 {
5399 unsigned long flags = MLX5_TC_FLAG(INGRESS);
5400 struct mlx5e_priv *priv = cb_priv;
5401
5402 if (!priv->netdev || !netif_device_present(priv->netdev))
5403 return -EOPNOTSUPP;
5404
5405 if (mlx5e_is_uplink_rep(priv))
5406 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5407 else
5408 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5409
5410 switch (type) {
5411 case TC_SETUP_CLSFLOWER:
5412 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5413 default:
5414 return -EOPNOTSUPP;
5415 }
5416 }
5417
5418 static bool mlx5e_tc_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
5419 struct mlx5e_tc_update_priv *tc_priv,
5420 u32 tunnel_id)
5421 {
5422 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5423 struct tunnel_match_enc_opts enc_opts = {};
5424 struct mlx5_rep_uplink_priv *uplink_priv;
5425 struct mlx5e_rep_priv *uplink_rpriv;
5426 struct metadata_dst *tun_dst;
5427 struct tunnel_match_key key;
5428 u32 tun_id, enc_opts_id;
5429 struct net_device *dev;
5430 int err;
5431
5432 enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
5433 tun_id = tunnel_id >> ENC_OPTS_BITS;
5434
5435 if (!tun_id)
5436 return true;
5437
5438 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5439 uplink_priv = &uplink_rpriv->uplink_priv;
5440
5441 err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
5442 if (err) {
5443 netdev_dbg(priv->netdev,
5444 "Couldn't find tunnel for tun_id: %d, err: %d\n",
5445 tun_id, err);
5446 return false;
5447 }
5448
5449 if (enc_opts_id) {
5450 err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
5451 enc_opts_id, &enc_opts);
5452 if (err) {
5453 netdev_dbg(priv->netdev,
5454 "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
5455 enc_opts_id, err);
5456 return false;
5457 }
5458 }
5459
5460 switch (key.enc_control.addr_type) {
5461 case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
5462 tun_dst = __ip_tun_set_dst(key.enc_ipv4.src, key.enc_ipv4.dst,
5463 key.enc_ip.tos, key.enc_ip.ttl,
5464 key.enc_tp.dst, TUNNEL_KEY,
5465 key32_to_tunnel_id(key.enc_key_id.keyid),
5466 enc_opts.key.len);
5467 break;
5468 case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
5469 tun_dst = __ipv6_tun_set_dst(&key.enc_ipv6.src, &key.enc_ipv6.dst,
5470 key.enc_ip.tos, key.enc_ip.ttl,
5471 key.enc_tp.dst, 0, TUNNEL_KEY,
5472 key32_to_tunnel_id(key.enc_key_id.keyid),
5473 enc_opts.key.len);
5474 break;
5475 default:
5476 netdev_dbg(priv->netdev,
5477 "Couldn't restore tunnel, unsupported addr_type: %d\n",
5478 key.enc_control.addr_type);
5479 return false;
5480 }
5481
5482 if (!tun_dst) {
5483 netdev_dbg(priv->netdev, "Couldn't restore tunnel, no tun_dst\n");
5484 return false;
5485 }
5486
5487 tun_dst->u.tun_info.key.tp_src = key.enc_tp.src;
5488
5489 if (enc_opts.key.len)
5490 ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
5491 enc_opts.key.data,
5492 enc_opts.key.len,
5493 enc_opts.key.dst_opt_type);
5494
5495 skb_dst_set(skb, (struct dst_entry *)tun_dst);
5496 dev = dev_get_by_index(&init_net, key.filter_ifindex);
5497 if (!dev) {
5498 netdev_dbg(priv->netdev,
5499 "Couldn't find tunnel device with ifindex: %d\n",
5500 key.filter_ifindex);
5501 return false;
5502 }
5503
5504 /* Set fwd_dev so we do dev_put() after datapath */
5505 tc_priv->fwd_dev = dev;
5506
5507 skb->dev = dev;
5508
5509 return true;
5510 }
5511
5512 static bool mlx5e_tc_restore_skb_tc_meta(struct sk_buff *skb, struct mlx5_tc_ct_priv *ct_priv,
5513 struct mlx5_mapped_obj *mapped_obj, u32 zone_restore_id,
5514 u32 tunnel_id, struct mlx5e_tc_update_priv *tc_priv)
5515 {
5516 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5517 struct tc_skb_ext *tc_skb_ext;
5518 u64 act_miss_cookie;
5519 u32 chain;
5520
5521 chain = mapped_obj->type == MLX5_MAPPED_OBJ_CHAIN ? mapped_obj->chain : 0;
5522 act_miss_cookie = mapped_obj->type == MLX5_MAPPED_OBJ_ACT_MISS ?
5523 mapped_obj->act_miss_cookie : 0;
5524 if (chain || act_miss_cookie) {
5525 if (!mlx5e_tc_ct_restore_flow(ct_priv, skb, zone_restore_id))
5526 return false;
5527
5528 tc_skb_ext = tc_skb_ext_alloc(skb);
5529 if (!tc_skb_ext) {
5530 WARN_ON(1);
5531 return false;
5532 }
5533
5534 if (act_miss_cookie) {
5535 tc_skb_ext->act_miss_cookie = act_miss_cookie;
5536 tc_skb_ext->act_miss = 1;
5537 } else {
5538 tc_skb_ext->chain = chain;
5539 }
5540 }
5541
5542 if (tc_priv)
5543 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5544
5545 return true;
5546 }
5547
5548 static void mlx5e_tc_restore_skb_sample(struct mlx5e_priv *priv, struct sk_buff *skb,
5549 struct mlx5_mapped_obj *mapped_obj,
5550 struct mlx5e_tc_update_priv *tc_priv)
5551 {
5552 if (!mlx5e_tc_restore_tunnel(priv, skb, tc_priv, mapped_obj->sample.tunnel_id)) {
5553 netdev_dbg(priv->netdev,
5554 "Failed to restore tunnel info for sampled packet\n");
5555 return;
5556 }
5557 mlx5e_tc_sample_skb(skb, mapped_obj);
5558 }
5559
5560 static bool mlx5e_tc_restore_skb_int_port(struct mlx5e_priv *priv, struct sk_buff *skb,
5561 struct mlx5_mapped_obj *mapped_obj,
5562 struct mlx5e_tc_update_priv *tc_priv,
5563 u32 tunnel_id)
5564 {
5565 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5566 struct mlx5_rep_uplink_priv *uplink_priv;
5567 struct mlx5e_rep_priv *uplink_rpriv;
5568 bool forward_tx = false;
5569
5570 /* Tunnel restore takes precedence over int port restore */
5571 if (tunnel_id)
5572 return mlx5e_tc_restore_tunnel(priv, skb, tc_priv, tunnel_id);
5573
5574 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
5575 uplink_priv = &uplink_rpriv->uplink_priv;
5576
5577 if (mlx5e_tc_int_port_dev_fwd(uplink_priv->int_port_priv, skb,
5578 mapped_obj->int_port_metadata, &forward_tx)) {
5579 /* Set fwd_dev for future dev_put */
5580 tc_priv->fwd_dev = skb->dev;
5581 tc_priv->forward_tx = forward_tx;
5582
5583 return true;
5584 }
5585
5586 return false;
5587 }
5588
5589 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
5590 struct mapping_ctx *mapping_ctx, u32 mapped_obj_id,
5591 struct mlx5_tc_ct_priv *ct_priv,
5592 u32 zone_restore_id, u32 tunnel_id,
5593 struct mlx5e_tc_update_priv *tc_priv)
5594 {
5595 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5596 struct mlx5_mapped_obj mapped_obj;
5597 int err;
5598
5599 err = mapping_find(mapping_ctx, mapped_obj_id, &mapped_obj);
5600 if (err) {
5601 netdev_dbg(skb->dev,
5602 "Couldn't find mapped object for mapped_obj_id: %d, err: %d\n",
5603 mapped_obj_id, err);
5604 return false;
5605 }
5606
5607 switch (mapped_obj.type) {
5608 case MLX5_MAPPED_OBJ_CHAIN:
5609 case MLX5_MAPPED_OBJ_ACT_MISS:
5610 return mlx5e_tc_restore_skb_tc_meta(skb, ct_priv, &mapped_obj, zone_restore_id,
5611 tunnel_id, tc_priv);
5612 case MLX5_MAPPED_OBJ_SAMPLE:
5613 mlx5e_tc_restore_skb_sample(priv, skb, &mapped_obj, tc_priv);
5614 tc_priv->skb_done = true;
5615 return true;
5616 case MLX5_MAPPED_OBJ_INT_PORT_METADATA:
5617 return mlx5e_tc_restore_skb_int_port(priv, skb, &mapped_obj, tc_priv, tunnel_id);
5618 default:
5619 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5620 return false;
5621 }
5622
5623 return false;
5624 }
5625
5626 bool mlx5e_tc_update_skb_nic(struct mlx5_cqe64 *cqe, struct sk_buff *skb)
5627 {
5628 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5629 u32 mapped_obj_id, reg_b, zone_restore_id;
5630 struct mlx5_tc_ct_priv *ct_priv;
5631 struct mapping_ctx *mapping_ctx;
5632 struct mlx5e_tc_table *tc;
5633
5634 reg_b = be32_to_cpu(cqe->ft_metadata);
5635 tc = mlx5e_fs_get_tc(priv->fs);
5636 mapped_obj_id = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5637 zone_restore_id = (reg_b >> MLX5_REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5638 ESW_ZONE_ID_MASK;
5639 ct_priv = tc->ct;
5640 mapping_ctx = tc->mapping;
5641
5642 return mlx5e_tc_update_skb(cqe, skb, mapping_ctx, mapped_obj_id, ct_priv, zone_restore_id,
5643 0, NULL);
5644 }
5645
5646 int mlx5e_tc_action_miss_mapping_get(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5647 u64 act_miss_cookie, u32 *act_miss_mapping)
5648 {
5649 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5650 struct mlx5_mapped_obj mapped_obj = {};
5651 struct mapping_ctx *ctx;
5652 int err;
5653
5654 ctx = esw->offloads.reg_c0_obj_pool;
5655
5656 mapped_obj.type = MLX5_MAPPED_OBJ_ACT_MISS;
5657 mapped_obj.act_miss_cookie = act_miss_cookie;
5658 err = mapping_add(ctx, &mapped_obj, act_miss_mapping);
5659 if (err)
5660 return err;
5661
5662 attr->act_id_restore_rule = esw_add_restore_rule(esw, *act_miss_mapping);
5663 if (IS_ERR(attr->act_id_restore_rule))
5664 goto err_rule;
5665
5666 return 0;
5667
5668 err_rule:
5669 mapping_remove(ctx, *act_miss_mapping);
5670 return err;
5671 }
5672
5673 void mlx5e_tc_action_miss_mapping_put(struct mlx5e_priv *priv, struct mlx5_flow_attr *attr,
5674 u32 act_miss_mapping)
5675 {
5676 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
5677 struct mapping_ctx *ctx;
5678
5679 ctx = esw->offloads.reg_c0_obj_pool;
5680 mlx5_del_flow_rules(attr->act_id_restore_rule);
5681 mapping_remove(ctx, act_miss_mapping);
5682 }