]> git.ipfire.org Git - thirdparty/kernel/stable.git/blame - drivers/net/ethernet/mellanox/mlx4/en_rx.c
net/mlx4_core: Use cq quota in SRIOV when creating completion EQs
[thirdparty/kernel/stable.git] / drivers / net / ethernet / mellanox / mlx4 / en_rx.c
CommitLineData
c27a02cd
YP
1/*
2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
12 * conditions are met:
13 *
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer.
17 *
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 *
32 */
33
076bb0c8 34#include <net/busy_poll.h>
47a38e15 35#include <linux/bpf.h>
a67edbf4 36#include <linux/bpf_trace.h>
c27a02cd 37#include <linux/mlx4/cq.h>
5a0e3ad6 38#include <linux/slab.h>
c27a02cd
YP
39#include <linux/mlx4/qp.h>
40#include <linux/skbuff.h>
b67bfe0d 41#include <linux/rculist.h>
c27a02cd
YP
42#include <linux/if_ether.h>
43#include <linux/if_vlan.h>
44#include <linux/vmalloc.h>
35f6f453 45#include <linux/irq.h>
c27a02cd 46
f8c6455b
SM
47#if IS_ENABLED(CONFIG_IPV6)
48#include <net/ip6_checksum.h>
49#endif
50
c27a02cd
YP
51#include "mlx4_en.h"
52
51151a16
ED
53static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
54 struct mlx4_en_rx_alloc *page_alloc,
55 const struct mlx4_en_frag_info *frag_info,
56 gfp_t _gfp)
57{
58 int order;
59 struct page *page;
60 dma_addr_t dma;
61
d576acf0 62 for (order = frag_info->order; ;) {
51151a16
ED
63 gfp_t gfp = _gfp;
64
65 if (order)
04aeb56a 66 gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NOMEMALLOC;
51151a16
ED
67 page = alloc_pages(gfp, order);
68 if (likely(page))
69 break;
70 if (--order < 0 ||
71 ((PAGE_SIZE << order) < frag_info->frag_size))
72 return -ENOMEM;
73 }
74 dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order,
d576acf0 75 frag_info->dma_dir);
de3d6fa8 76 if (unlikely(dma_mapping_error(priv->ddev, dma))) {
51151a16
ED
77 put_page(page);
78 return -ENOMEM;
79 }
70fbe079 80 page_alloc->page_size = PAGE_SIZE << order;
51151a16
ED
81 page_alloc->page = page;
82 page_alloc->dma = dma;
5f6e9800 83 page_alloc->page_offset = 0;
51151a16 84 /* Not doing get_page() for each frag is a big win
98226208 85 * on asymetric workloads. Note we can not use atomic_set().
51151a16 86 */
fe896d18 87 page_ref_add(page, page_alloc->page_size / frag_info->frag_stride - 1);
51151a16
ED
88 return 0;
89}
90
4cce66cd
TLSC
91static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
92 struct mlx4_en_rx_desc *rx_desc,
93 struct mlx4_en_rx_alloc *frags,
51151a16
ED
94 struct mlx4_en_rx_alloc *ring_alloc,
95 gfp_t gfp)
c27a02cd 96{
4cce66cd 97 struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
51151a16 98 const struct mlx4_en_frag_info *frag_info;
c27a02cd 99 struct page *page;
4cce66cd 100 int i;
c27a02cd 101
4cce66cd
TLSC
102 for (i = 0; i < priv->num_frags; i++) {
103 frag_info = &priv->frag_info[i];
51151a16 104 page_alloc[i] = ring_alloc[i];
70fbe079
AV
105 page_alloc[i].page_offset += frag_info->frag_stride;
106
107 if (page_alloc[i].page_offset + frag_info->frag_stride <=
108 ring_alloc[i].page_size)
51151a16 109 continue;
70fbe079 110
de3d6fa8
TT
111 if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i],
112 frag_info, gfp)))
51151a16 113 goto out;
4cce66cd 114 }
c27a02cd 115
4cce66cd
TLSC
116 for (i = 0; i < priv->num_frags; i++) {
117 frags[i] = ring_alloc[i];
ea3349a0
MKL
118 frags[i].page_offset += priv->frag_info[i].rx_headroom;
119 rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
120 frags[i].page_offset);
4cce66cd 121 ring_alloc[i] = page_alloc[i];
c27a02cd 122 }
4cce66cd 123
c27a02cd 124 return 0;
4cce66cd 125
4cce66cd
TLSC
126out:
127 while (i--) {
51151a16 128 if (page_alloc[i].page != ring_alloc[i].page) {
4cce66cd 129 dma_unmap_page(priv->ddev, page_alloc[i].dma,
d576acf0
BB
130 page_alloc[i].page_size,
131 priv->frag_info[i].dma_dir);
51151a16 132 page = page_alloc[i].page;
851b10d6
KK
133 /* Revert changes done by mlx4_alloc_pages */
134 page_ref_sub(page, page_alloc[i].page_size /
135 priv->frag_info[i].frag_stride - 1);
51151a16
ED
136 put_page(page);
137 }
4cce66cd
TLSC
138 }
139 return -ENOMEM;
140}
141
142static void mlx4_en_free_frag(struct mlx4_en_priv *priv,
143 struct mlx4_en_rx_alloc *frags,
144 int i)
145{
51151a16 146 const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
021f1107 147 u32 next_frag_end = frags[i].page_offset + 2 * frag_info->frag_stride;
4cce66cd 148
021f1107
AV
149
150 if (next_frag_end > frags[i].page_size)
70fbe079 151 dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size,
d576acf0 152 frag_info->dma_dir);
51151a16 153
4cce66cd
TLSC
154 if (frags[i].page)
155 put_page(frags[i].page);
c27a02cd
YP
156}
157
158static int mlx4_en_init_allocator(struct mlx4_en_priv *priv,
159 struct mlx4_en_rx_ring *ring)
160{
c27a02cd 161 int i;
51151a16 162 struct mlx4_en_rx_alloc *page_alloc;
c27a02cd
YP
163
164 for (i = 0; i < priv->num_frags; i++) {
51151a16 165 const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
c27a02cd 166
51151a16 167 if (mlx4_alloc_pages(priv, &ring->page_alloc[i],
1ab25f86 168 frag_info, GFP_KERNEL | __GFP_COLD))
4cce66cd 169 goto out;
b110d2ce
IS
170
171 en_dbg(DRV, priv, " frag %d allocator: - size:%d frags:%d\n",
172 i, ring->page_alloc[i].page_size,
fe896d18 173 page_ref_count(ring->page_alloc[i].page));
c27a02cd
YP
174 }
175 return 0;
176
177out:
178 while (i--) {
51151a16
ED
179 struct page *page;
180
c27a02cd 181 page_alloc = &ring->page_alloc[i];
4cce66cd 182 dma_unmap_page(priv->ddev, page_alloc->dma,
d576acf0
BB
183 page_alloc->page_size,
184 priv->frag_info[i].dma_dir);
51151a16 185 page = page_alloc->page;
851b10d6
KK
186 /* Revert changes done by mlx4_alloc_pages */
187 page_ref_sub(page, page_alloc->page_size /
188 priv->frag_info[i].frag_stride - 1);
51151a16 189 put_page(page);
c27a02cd
YP
190 page_alloc->page = NULL;
191 }
192 return -ENOMEM;
193}
194
195static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv,
196 struct mlx4_en_rx_ring *ring)
197{
198 struct mlx4_en_rx_alloc *page_alloc;
199 int i;
200
201 for (i = 0; i < priv->num_frags; i++) {
51151a16
ED
202 const struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
203
c27a02cd 204 page_alloc = &ring->page_alloc[i];
453a6082
YP
205 en_dbg(DRV, priv, "Freeing allocator:%d count:%d\n",
206 i, page_count(page_alloc->page));
c27a02cd 207
4cce66cd 208 dma_unmap_page(priv->ddev, page_alloc->dma,
d576acf0 209 page_alloc->page_size, frag_info->dma_dir);
70fbe079
AV
210 while (page_alloc->page_offset + frag_info->frag_stride <
211 page_alloc->page_size) {
51151a16 212 put_page(page_alloc->page);
70fbe079 213 page_alloc->page_offset += frag_info->frag_stride;
51151a16 214 }
c27a02cd
YP
215 page_alloc->page = NULL;
216 }
217}
218
c27a02cd
YP
219static void mlx4_en_init_rx_desc(struct mlx4_en_priv *priv,
220 struct mlx4_en_rx_ring *ring, int index)
221{
222 struct mlx4_en_rx_desc *rx_desc = ring->buf + ring->stride * index;
c27a02cd
YP
223 int possible_frags;
224 int i;
225
c27a02cd
YP
226 /* Set size and memtype fields */
227 for (i = 0; i < priv->num_frags; i++) {
c27a02cd
YP
228 rx_desc->data[i].byte_count =
229 cpu_to_be32(priv->frag_info[i].frag_size);
230 rx_desc->data[i].lkey = cpu_to_be32(priv->mdev->mr.key);
231 }
232
233 /* If the number of used fragments does not fill up the ring stride,
234 * remaining (unused) fragments must be padded with null address/size
235 * and a special memory key */
236 possible_frags = (ring->stride - sizeof(struct mlx4_en_rx_desc)) / DS_SIZE;
237 for (i = priv->num_frags; i < possible_frags; i++) {
238 rx_desc->data[i].byte_count = 0;
239 rx_desc->data[i].lkey = cpu_to_be32(MLX4_EN_MEMTYPE_PAD);
240 rx_desc->data[i].addr = 0;
241 }
242}
243
c27a02cd 244static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
51151a16
ED
245 struct mlx4_en_rx_ring *ring, int index,
246 gfp_t gfp)
c27a02cd
YP
247{
248 struct mlx4_en_rx_desc *rx_desc = ring->buf + (index * ring->stride);
4cce66cd
TLSC
249 struct mlx4_en_rx_alloc *frags = ring->rx_info +
250 (index << priv->log_rx_info);
c27a02cd 251
d576acf0
BB
252 if (ring->page_cache.index > 0) {
253 frags[0] = ring->page_cache.buf[--ring->page_cache.index];
ea3349a0
MKL
254 rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
255 frags[0].page_offset);
d576acf0
BB
256 return 0;
257 }
258
51151a16 259 return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp);
c27a02cd
YP
260}
261
07841f9d
IS
262static inline bool mlx4_en_is_ring_empty(struct mlx4_en_rx_ring *ring)
263{
07841f9d
IS
264 return ring->prod == ring->cons;
265}
266
c27a02cd
YP
267static inline void mlx4_en_update_rx_prod_db(struct mlx4_en_rx_ring *ring)
268{
269 *ring->wqres.db.db = cpu_to_be32(ring->prod & 0xffff);
270}
271
38aab07c
YP
272static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
273 struct mlx4_en_rx_ring *ring,
274 int index)
275{
4cce66cd 276 struct mlx4_en_rx_alloc *frags;
38aab07c
YP
277 int nr;
278
4cce66cd 279 frags = ring->rx_info + (index << priv->log_rx_info);
38aab07c 280 for (nr = 0; nr < priv->num_frags; nr++) {
453a6082 281 en_dbg(DRV, priv, "Freeing fragment:%d\n", nr);
4cce66cd 282 mlx4_en_free_frag(priv, frags, nr);
38aab07c
YP
283 }
284}
285
c27a02cd
YP
286static int mlx4_en_fill_rx_buffers(struct mlx4_en_priv *priv)
287{
c27a02cd
YP
288 struct mlx4_en_rx_ring *ring;
289 int ring_ind;
290 int buf_ind;
38aab07c 291 int new_size;
c27a02cd
YP
292
293 for (buf_ind = 0; buf_ind < priv->prof->rx_ring_size; buf_ind++) {
294 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
41d942d5 295 ring = priv->rx_ring[ring_ind];
c27a02cd
YP
296
297 if (mlx4_en_prepare_rx_desc(priv, ring,
51151a16 298 ring->actual_size,
1ab25f86 299 GFP_KERNEL | __GFP_COLD)) {
c27a02cd 300 if (ring->actual_size < MLX4_EN_MIN_RX_SIZE) {
1a91de28 301 en_err(priv, "Failed to allocate enough rx buffers\n");
c27a02cd
YP
302 return -ENOMEM;
303 } else {
38aab07c 304 new_size = rounddown_pow_of_two(ring->actual_size);
1a91de28 305 en_warn(priv, "Only %d buffers allocated reducing ring size to %d\n",
453a6082 306 ring->actual_size, new_size);
38aab07c 307 goto reduce_rings;
c27a02cd
YP
308 }
309 }
310 ring->actual_size++;
311 ring->prod++;
312 }
313 }
38aab07c
YP
314 return 0;
315
316reduce_rings:
317 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
41d942d5 318 ring = priv->rx_ring[ring_ind];
38aab07c
YP
319 while (ring->actual_size > new_size) {
320 ring->actual_size--;
321 ring->prod--;
322 mlx4_en_free_rx_desc(priv, ring, ring->actual_size);
323 }
38aab07c
YP
324 }
325
c27a02cd
YP
326 return 0;
327}
328
c27a02cd
YP
329static void mlx4_en_free_rx_buf(struct mlx4_en_priv *priv,
330 struct mlx4_en_rx_ring *ring)
331{
c27a02cd 332 int index;
c27a02cd 333
453a6082
YP
334 en_dbg(DRV, priv, "Freeing Rx buf - cons:%d prod:%d\n",
335 ring->cons, ring->prod);
c27a02cd
YP
336
337 /* Unmap and free Rx buffers */
07841f9d 338 while (!mlx4_en_is_ring_empty(ring)) {
c27a02cd 339 index = ring->cons & ring->size_mask;
453a6082 340 en_dbg(DRV, priv, "Processing descriptor:%d\n", index);
38aab07c 341 mlx4_en_free_rx_desc(priv, ring, index);
c27a02cd
YP
342 ++ring->cons;
343 }
344}
345
02512482
IS
346void mlx4_en_set_num_rx_rings(struct mlx4_en_dev *mdev)
347{
348 int i;
349 int num_of_eqs;
bb2146bc 350 int num_rx_rings;
02512482
IS
351 struct mlx4_dev *dev = mdev->dev;
352
353 mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_ETH) {
c66fa19c
MB
354 num_of_eqs = max_t(int, MIN_RX_RINGS,
355 min_t(int,
356 mlx4_get_eqs_per_port(mdev->dev, i),
357 DEF_RX_RINGS));
02512482 358
ea1c1af1
AV
359 num_rx_rings = mlx4_low_memory_profile() ? MIN_RX_RINGS :
360 min_t(int, num_of_eqs,
361 netif_get_num_default_rss_queues());
02512482 362 mdev->profile.prof[i].rx_ring_num =
bb2146bc 363 rounddown_pow_of_two(num_rx_rings);
02512482
IS
364 }
365}
366
c27a02cd 367int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
41d942d5 368 struct mlx4_en_rx_ring **pring,
163561a4 369 u32 size, u16 stride, int node)
c27a02cd
YP
370{
371 struct mlx4_en_dev *mdev = priv->mdev;
41d942d5 372 struct mlx4_en_rx_ring *ring;
4cce66cd 373 int err = -ENOMEM;
c27a02cd
YP
374 int tmp;
375
163561a4 376 ring = kzalloc_node(sizeof(*ring), GFP_KERNEL, node);
41d942d5 377 if (!ring) {
163561a4
EE
378 ring = kzalloc(sizeof(*ring), GFP_KERNEL);
379 if (!ring) {
380 en_err(priv, "Failed to allocate RX ring structure\n");
381 return -ENOMEM;
382 }
41d942d5
EE
383 }
384
c27a02cd
YP
385 ring->prod = 0;
386 ring->cons = 0;
387 ring->size = size;
388 ring->size_mask = size - 1;
389 ring->stride = stride;
390 ring->log_stride = ffs(ring->stride) - 1;
9f519f68 391 ring->buf_size = ring->size * ring->stride + TXBB_SIZE;
c27a02cd
YP
392
393 tmp = size * roundup_pow_of_two(MLX4_EN_MAX_RX_FRAGS *
4cce66cd 394 sizeof(struct mlx4_en_rx_alloc));
163561a4 395 ring->rx_info = vmalloc_node(tmp, node);
41d942d5 396 if (!ring->rx_info) {
163561a4
EE
397 ring->rx_info = vmalloc(tmp);
398 if (!ring->rx_info) {
399 err = -ENOMEM;
400 goto err_ring;
401 }
41d942d5 402 }
e404decb 403
453a6082 404 en_dbg(DRV, priv, "Allocated rx_info ring at addr:%p size:%d\n",
c27a02cd
YP
405 ring->rx_info, tmp);
406
163561a4 407 /* Allocate HW buffers on provided NUMA node */
872bf2fb 408 set_dev_node(&mdev->dev->persist->pdev->dev, node);
73898db0 409 err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
872bf2fb 410 set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
c27a02cd 411 if (err)
41d942d5 412 goto err_info;
c27a02cd 413
c27a02cd
YP
414 ring->buf = ring->wqres.buf.direct.buf;
415
ec693d47
AV
416 ring->hwtstamp_rx_filter = priv->hwtstamp_config.rx_filter;
417
41d942d5 418 *pring = ring;
c27a02cd
YP
419 return 0;
420
41d942d5 421err_info:
c27a02cd
YP
422 vfree(ring->rx_info);
423 ring->rx_info = NULL;
41d942d5
EE
424err_ring:
425 kfree(ring);
426 *pring = NULL;
427
c27a02cd
YP
428 return err;
429}
430
431int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv)
432{
c27a02cd
YP
433 struct mlx4_en_rx_ring *ring;
434 int i;
435 int ring_ind;
436 int err;
437 int stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
438 DS_SIZE * priv->num_frags);
c27a02cd
YP
439
440 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
41d942d5 441 ring = priv->rx_ring[ring_ind];
c27a02cd
YP
442
443 ring->prod = 0;
444 ring->cons = 0;
445 ring->actual_size = 0;
41d942d5 446 ring->cqn = priv->rx_cq[ring_ind]->mcq.cqn;
c27a02cd
YP
447
448 ring->stride = stride;
6496bbf0
EE
449 if (ring->stride <= TXBB_SIZE) {
450 /* Stamp first unused send wqe */
451 __be32 *ptr = (__be32 *)ring->buf;
452 __be32 stamp = cpu_to_be32(1 << STAMP_SHIFT);
453 *ptr = stamp;
454 /* Move pointer to start of rx section */
9f519f68 455 ring->buf += TXBB_SIZE;
6496bbf0 456 }
9f519f68 457
c27a02cd
YP
458 ring->log_stride = ffs(ring->stride) - 1;
459 ring->buf_size = ring->size * ring->stride;
460
461 memset(ring->buf, 0, ring->buf_size);
462 mlx4_en_update_rx_prod_db(ring);
463
4cce66cd 464 /* Initialize all descriptors */
c27a02cd
YP
465 for (i = 0; i < ring->size; i++)
466 mlx4_en_init_rx_desc(priv, ring, i);
467
468 /* Initialize page allocators */
469 err = mlx4_en_init_allocator(priv, ring);
470 if (err) {
453a6082 471 en_err(priv, "Failed initializing ring allocator\n");
60b1809f
YP
472 if (ring->stride <= TXBB_SIZE)
473 ring->buf -= TXBB_SIZE;
9a4f92a6
YP
474 ring_ind--;
475 goto err_allocator;
c27a02cd 476 }
c27a02cd 477 }
b58515be
IM
478 err = mlx4_en_fill_rx_buffers(priv);
479 if (err)
c27a02cd
YP
480 goto err_buffers;
481
482 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++) {
41d942d5 483 ring = priv->rx_ring[ring_ind];
c27a02cd 484
00d7d7bc 485 ring->size_mask = ring->actual_size - 1;
c27a02cd 486 mlx4_en_update_rx_prod_db(ring);
c27a02cd
YP
487 }
488
489 return 0;
490
c27a02cd
YP
491err_buffers:
492 for (ring_ind = 0; ring_ind < priv->rx_ring_num; ring_ind++)
41d942d5 493 mlx4_en_free_rx_buf(priv, priv->rx_ring[ring_ind]);
c27a02cd
YP
494
495 ring_ind = priv->rx_ring_num - 1;
496err_allocator:
497 while (ring_ind >= 0) {
41d942d5
EE
498 if (priv->rx_ring[ring_ind]->stride <= TXBB_SIZE)
499 priv->rx_ring[ring_ind]->buf -= TXBB_SIZE;
500 mlx4_en_destroy_allocator(priv, priv->rx_ring[ring_ind]);
c27a02cd
YP
501 ring_ind--;
502 }
503 return err;
504}
505
07841f9d
IS
506/* We recover from out of memory by scheduling our napi poll
507 * function (mlx4_en_process_cq), which tries to allocate
508 * all missing RX buffers (call to mlx4_en_refill_rx_buffers).
509 */
510void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv)
511{
512 int ring;
513
514 if (!priv->port_up)
515 return;
516
517 for (ring = 0; ring < priv->rx_ring_num; ring++) {
bd4ce941
BP
518 if (mlx4_en_is_ring_empty(priv->rx_ring[ring])) {
519 local_bh_disable();
07841f9d 520 napi_reschedule(&priv->rx_cq[ring]->napi);
bd4ce941
BP
521 local_bh_enable();
522 }
07841f9d
IS
523 }
524}
525
d576acf0
BB
526/* When the rx ring is running in page-per-packet mode, a released frame can go
527 * directly into a small cache, to avoid unmapping or touching the page
528 * allocator. In bpf prog performance scenarios, buffers are either forwarded
529 * or dropped, never converted to skbs, so every page can come directly from
530 * this cache when it is sized to be a multiple of the napi budget.
531 */
532bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring,
533 struct mlx4_en_rx_alloc *frame)
534{
535 struct mlx4_en_page_cache *cache = &ring->page_cache;
536
537 if (cache->index >= MLX4_EN_CACHE_SIZE)
538 return false;
539
540 cache->buf[cache->index++] = *frame;
541 return true;
542}
543
c27a02cd 544void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
41d942d5
EE
545 struct mlx4_en_rx_ring **pring,
546 u32 size, u16 stride)
c27a02cd
YP
547{
548 struct mlx4_en_dev *mdev = priv->mdev;
41d942d5 549 struct mlx4_en_rx_ring *ring = *pring;
cb7386d3 550 struct bpf_prog *old_prog;
c27a02cd 551
326fe02d
BB
552 old_prog = rcu_dereference_protected(
553 ring->xdp_prog,
554 lockdep_is_held(&mdev->state_lock));
cb7386d3
BB
555 if (old_prog)
556 bpf_prog_put(old_prog);
68355f71 557 mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE);
c27a02cd
YP
558 vfree(ring->rx_info);
559 ring->rx_info = NULL;
41d942d5
EE
560 kfree(ring);
561 *pring = NULL;
c27a02cd
YP
562}
563
564void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
565 struct mlx4_en_rx_ring *ring)
566{
d576acf0
BB
567 int i;
568
569 for (i = 0; i < ring->page_cache.index; i++) {
570 struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i];
571
572 dma_unmap_page(priv->ddev, frame->dma, frame->page_size,
573 priv->frag_info[0].dma_dir);
574 put_page(frame->page);
575 }
576 ring->page_cache.index = 0;
c27a02cd 577 mlx4_en_free_rx_buf(priv, ring);
9f519f68
YP
578 if (ring->stride <= TXBB_SIZE)
579 ring->buf -= TXBB_SIZE;
c27a02cd
YP
580 mlx4_en_destroy_allocator(priv, ring);
581}
582
583
c27a02cd
YP
584static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
585 struct mlx4_en_rx_desc *rx_desc,
4cce66cd 586 struct mlx4_en_rx_alloc *frags,
90278c9f 587 struct sk_buff *skb,
c27a02cd
YP
588 int length)
589{
90278c9f 590 struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
c27a02cd
YP
591 struct mlx4_en_frag_info *frag_info;
592 int nr;
593 dma_addr_t dma;
594
4cce66cd 595 /* Collect used fragments while replacing them in the HW descriptors */
c27a02cd
YP
596 for (nr = 0; nr < priv->num_frags; nr++) {
597 frag_info = &priv->frag_info[nr];
598 if (length <= frag_info->frag_prefix_size)
599 break;
de3d6fa8 600 if (unlikely(!frags[nr].page))
4cce66cd 601 goto fail;
c27a02cd 602
c27a02cd 603 dma = be64_to_cpu(rx_desc->data[nr].addr);
4cce66cd
TLSC
604 dma_sync_single_for_cpu(priv->ddev, dma, frag_info->frag_size,
605 DMA_FROM_DEVICE);
c27a02cd 606
4cce66cd 607 /* Save page reference in skb */
4cce66cd
TLSC
608 __skb_frag_set_page(&skb_frags_rx[nr], frags[nr].page);
609 skb_frag_size_set(&skb_frags_rx[nr], frag_info->frag_size);
70fbe079 610 skb_frags_rx[nr].page_offset = frags[nr].page_offset;
4cce66cd 611 skb->truesize += frag_info->frag_stride;
51151a16 612 frags[nr].page = NULL;
c27a02cd
YP
613 }
614 /* Adjust size of last fragment to match actual length */
973507cb 615 if (nr > 0)
9e903e08
ED
616 skb_frag_size_set(&skb_frags_rx[nr - 1],
617 length - priv->frag_info[nr - 1].frag_prefix_size);
c27a02cd
YP
618 return nr;
619
620fail:
c27a02cd
YP
621 while (nr > 0) {
622 nr--;
311761c8 623 __skb_frag_unref(&skb_frags_rx[nr]);
c27a02cd
YP
624 }
625 return 0;
626}
627
628
629static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
630 struct mlx4_en_rx_desc *rx_desc,
4cce66cd 631 struct mlx4_en_rx_alloc *frags,
c27a02cd
YP
632 unsigned int length)
633{
c27a02cd
YP
634 struct sk_buff *skb;
635 void *va;
636 int used_frags;
637 dma_addr_t dma;
638
c056b734 639 skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN);
de3d6fa8 640 if (unlikely(!skb)) {
453a6082 641 en_dbg(RX_ERR, priv, "Failed allocating skb\n");
c27a02cd
YP
642 return NULL;
643 }
c27a02cd
YP
644 skb_reserve(skb, NET_IP_ALIGN);
645 skb->len = length;
c27a02cd
YP
646
647 /* Get pointer to first fragment so we could copy the headers into the
648 * (linear part of the) skb */
70fbe079 649 va = page_address(frags[0].page) + frags[0].page_offset;
c27a02cd
YP
650
651 if (length <= SMALL_PACKET_SIZE) {
652 /* We are copying all relevant data to the skb - temporarily
4cce66cd 653 * sync buffers for the copy */
c27a02cd 654 dma = be64_to_cpu(rx_desc->data[0].addr);
ebf8c9aa 655 dma_sync_single_for_cpu(priv->ddev, dma, length,
e4fc8560 656 DMA_FROM_DEVICE);
c27a02cd 657 skb_copy_to_linear_data(skb, va, length);
c27a02cd
YP
658 skb->tail += length;
659 } else {
cfecec56
ED
660 unsigned int pull_len;
661
c27a02cd 662 /* Move relevant fragments to skb */
4cce66cd
TLSC
663 used_frags = mlx4_en_complete_rx_desc(priv, rx_desc, frags,
664 skb, length);
785a0982
YP
665 if (unlikely(!used_frags)) {
666 kfree_skb(skb);
667 return NULL;
668 }
c27a02cd
YP
669 skb_shinfo(skb)->nr_frags = used_frags;
670
cfecec56 671 pull_len = eth_get_headlen(va, SMALL_PACKET_SIZE);
c27a02cd 672 /* Copy headers into the skb linear buffer */
cfecec56
ED
673 memcpy(skb->data, va, pull_len);
674 skb->tail += pull_len;
c27a02cd
YP
675
676 /* Skip headers in first fragment */
cfecec56 677 skb_shinfo(skb)->frags[0].page_offset += pull_len;
c27a02cd
YP
678
679 /* Adjust size of first fragment */
cfecec56
ED
680 skb_frag_size_sub(&skb_shinfo(skb)->frags[0], pull_len);
681 skb->data_len = length - pull_len;
c27a02cd
YP
682 }
683 return skb;
684}
685
e7c1c2c4
YP
686static void validate_loopback(struct mlx4_en_priv *priv, struct sk_buff *skb)
687{
688 int i;
689 int offset = ETH_HLEN;
690
691 for (i = 0; i < MLX4_LOOPBACK_TEST_PAYLOAD; i++, offset++) {
692 if (*(skb->data + offset) != (unsigned char) (i & 0xff))
693 goto out_loopback;
694 }
695 /* Loopback found */
696 priv->loopback_ok = 1;
697
698out_loopback:
699 dev_kfree_skb_any(skb);
700}
c27a02cd 701
dad42c30
ED
702static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
703 struct mlx4_en_rx_ring *ring)
4cce66cd 704{
dad42c30 705 u32 missing = ring->actual_size - (ring->prod - ring->cons);
4cce66cd 706
dad42c30
ED
707 /* Try to batch allocations, but not too much. */
708 if (missing < 8)
709 return false;
710 do {
711 if (mlx4_en_prepare_rx_desc(priv, ring,
712 ring->prod & ring->size_mask,
dceeab0e
ED
713 GFP_ATOMIC | __GFP_COLD |
714 __GFP_MEMALLOC))
4cce66cd
TLSC
715 break;
716 ring->prod++;
dad42c30
ED
717 } while (--missing);
718
719 return true;
4cce66cd
TLSC
720}
721
f8c6455b
SM
722/* When hardware doesn't strip the vlan, we need to calculate the checksum
723 * over it and add it to the hardware's checksum calculation
724 */
725static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
726 struct vlan_hdr *vlanh)
727{
728 return csum_add(hw_checksum, *(__wsum *)vlanh);
729}
730
731/* Although the stack expects checksum which doesn't include the pseudo
732 * header, the HW adds it. To address that, we are subtracting the pseudo
733 * header checksum from the checksum value provided by the HW.
734 */
735static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
736 struct iphdr *iph)
737{
738 __u16 length_for_csum = 0;
739 __wsum csum_pseudo_header = 0;
740
741 length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
742 csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
743 length_for_csum, iph->protocol, 0);
744 skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
745}
746
747#if IS_ENABLED(CONFIG_IPV6)
748/* In IPv6 packets, besides subtracting the pseudo header checksum,
749 * we also compute/add the IP header checksum which
750 * is not added by the HW.
751 */
752static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
753 struct ipv6hdr *ipv6h)
754{
755 __wsum csum_pseudo_hdr = 0;
756
de3d6fa8
TT
757 if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT ||
758 ipv6h->nexthdr == IPPROTO_HOPOPTS))
f8c6455b 759 return -1;
82d69203 760 hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr));
f8c6455b
SM
761
762 csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
763 sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
764 csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
765 csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
766
767 skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
768 skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
769 return 0;
770}
771#endif
772static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
79a25852 773 netdev_features_t dev_features)
f8c6455b
SM
774{
775 __wsum hw_checksum = 0;
776
777 void *hdr = (u8 *)va + sizeof(struct ethhdr);
778
779 hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
780
e802f8e4 781 if (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK) &&
79a25852 782 !(dev_features & NETIF_F_HW_VLAN_CTAG_RX)) {
f8c6455b
SM
783 hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
784 hdr += sizeof(struct vlan_hdr);
785 }
786
787 if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
788 get_fixed_ipv4_csum(hw_checksum, skb, hdr);
789#if IS_ENABLED(CONFIG_IPV6)
790 else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
de3d6fa8 791 if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr)))
f8c6455b
SM
792 return -1;
793#endif
794 return 0;
795}
796
c27a02cd
YP
797int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
798{
799 struct mlx4_en_priv *priv = netdev_priv(dev);
ec693d47 800 struct mlx4_en_dev *mdev = priv->mdev;
c27a02cd 801 struct mlx4_cqe *cqe;
41d942d5 802 struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring];
4cce66cd 803 struct mlx4_en_rx_alloc *frags;
c27a02cd 804 struct mlx4_en_rx_desc *rx_desc;
47a38e15 805 struct bpf_prog *xdp_prog;
9ecc2d86 806 int doorbell_pending;
c27a02cd
YP
807 struct sk_buff *skb;
808 int index;
809 int nr;
810 unsigned int length;
811 int polled = 0;
812 int ip_summed;
08ff3235 813 int factor = priv->cqe_factor;
ec693d47 814 u64 timestamp;
837052d0 815 bool l2_tunnel;
c27a02cd 816
de3d6fa8 817 if (unlikely(!priv->port_up))
c27a02cd
YP
818 return 0;
819
de3d6fa8 820 if (unlikely(budget <= 0))
38be0a34
EB
821 return polled;
822
326fe02d
BB
823 /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */
824 rcu_read_lock();
825 xdp_prog = rcu_dereference(ring->xdp_prog);
9ecc2d86 826 doorbell_pending = 0;
47a38e15 827
c27a02cd
YP
828 /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
829 * descriptor offset can be deduced from the CQE index instead of
830 * reading 'cqe->index' */
831 index = cq->mcq.cons_index & ring->size_mask;
b1b6b4da 832 cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
c27a02cd
YP
833
834 /* Process all completed CQEs */
835 while (XNOR(cqe->owner_sr_opcode & MLX4_CQE_OWNER_MASK,
836 cq->mcq.cons_index & cq->size)) {
837
4cce66cd 838 frags = ring->rx_info + (index << priv->log_rx_info);
c27a02cd
YP
839 rx_desc = ring->buf + (index << ring->log_stride);
840
841 /*
842 * make sure we read the CQE after we read the ownership bit
843 */
12b3375f 844 dma_rmb();
c27a02cd
YP
845
846 /* Drop packet on bad receive or bad checksum */
847 if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) ==
848 MLX4_CQE_OPCODE_ERROR)) {
1a91de28
JP
849 en_err(priv, "CQE completed in error - vendor syndrom:%d syndrom:%d\n",
850 ((struct mlx4_err_cqe *)cqe)->vendor_err_syndrome,
851 ((struct mlx4_err_cqe *)cqe)->syndrome);
c27a02cd
YP
852 goto next;
853 }
854 if (unlikely(cqe->badfcs_enc & MLX4_CQE_BAD_FCS)) {
453a6082 855 en_dbg(RX_ERR, priv, "Accepted frame with bad FCS\n");
c27a02cd
YP
856 goto next;
857 }
858
79aeaccd
YB
859 /* Check if we need to drop the packet if SRIOV is not enabled
860 * and not performing the selftest or flb disabled
861 */
862 if (priv->flags & MLX4_EN_FLAG_RX_FILTER_NEEDED) {
863 struct ethhdr *ethh;
864 dma_addr_t dma;
79aeaccd
YB
865 /* Get pointer to first fragment since we haven't
866 * skb yet and cast it to ethhdr struct
867 */
868 dma = be64_to_cpu(rx_desc->data[0].addr);
869 dma_sync_single_for_cpu(priv->ddev, dma, sizeof(*ethh),
870 DMA_FROM_DEVICE);
871 ethh = (struct ethhdr *)(page_address(frags[0].page) +
70fbe079 872 frags[0].page_offset);
79aeaccd 873
c07cb4b0
YB
874 if (is_multicast_ether_addr(ethh->h_dest)) {
875 struct mlx4_mac_entry *entry;
c07cb4b0
YB
876 struct hlist_head *bucket;
877 unsigned int mac_hash;
878
879 /* Drop the packet, since HW loopback-ed it */
880 mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX];
881 bucket = &priv->mac_hash[mac_hash];
b67bfe0d 882 hlist_for_each_entry_rcu(entry, bucket, hlist) {
c07cb4b0 883 if (ether_addr_equal_64bits(entry->mac,
326fe02d 884 ethh->h_source))
c07cb4b0 885 goto next;
c07cb4b0 886 }
c07cb4b0 887 }
79aeaccd 888 }
5b4c4d36 889
c27a02cd
YP
890 /*
891 * Packet is OK - process it.
892 */
893 length = be32_to_cpu(cqe->byte_cnt);
4a5f4dd8 894 length -= ring->fcs_del;
837052d0
OG
895 l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
896 (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
c27a02cd 897
47a38e15
BB
898 /* A bpf program gets first chance to drop the packet. It may
899 * read bytes but not past the end of the frag.
900 */
901 if (xdp_prog) {
902 struct xdp_buff xdp;
903 dma_addr_t dma;
ea3349a0 904 void *orig_data;
47a38e15
BB
905 u32 act;
906
907 dma = be64_to_cpu(rx_desc->data[0].addr);
908 dma_sync_single_for_cpu(priv->ddev, dma,
909 priv->frag_info[0].frag_size,
910 DMA_FROM_DEVICE);
911
ea3349a0
MKL
912 xdp.data_hard_start = page_address(frags[0].page);
913 xdp.data = xdp.data_hard_start + frags[0].page_offset;
47a38e15 914 xdp.data_end = xdp.data + length;
ea3349a0 915 orig_data = xdp.data;
47a38e15
BB
916
917 act = bpf_prog_run_xdp(xdp_prog, &xdp);
ea3349a0
MKL
918
919 if (xdp.data != orig_data) {
920 length = xdp.data_end - xdp.data;
921 frags[0].page_offset = xdp.data -
922 xdp.data_hard_start;
923 }
924
47a38e15
BB
925 switch (act) {
926 case XDP_PASS:
927 break;
9ecc2d86 928 case XDP_TX:
15fca2c8 929 if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
67f8b1dc 930 length, cq->ring,
de3d6fa8 931 &doorbell_pending)))
9ecc2d86 932 goto consumed;
a67edbf4 933 trace_xdp_exception(dev, xdp_prog, act);
15fca2c8 934 goto xdp_drop_no_cnt; /* Drop on xmit failure */
47a38e15
BB
935 default:
936 bpf_warn_invalid_xdp_action(act);
937 case XDP_ABORTED:
a67edbf4 938 trace_xdp_exception(dev, xdp_prog, act);
47a38e15 939 case XDP_DROP:
15fca2c8
TT
940 ring->xdp_drop++;
941xdp_drop_no_cnt:
de3d6fa8 942 if (likely(mlx4_en_rx_recycle(ring, frags)))
d576acf0 943 goto consumed;
47a38e15
BB
944 goto next;
945 }
946 }
947
15fca2c8
TT
948 ring->bytes += length;
949 ring->packets++;
950
c8c64cff 951 if (likely(dev->features & NETIF_F_RXCSUM)) {
f8c6455b
SM
952 if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
953 MLX4_CQE_STATUS_UDP)) {
954 if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
955 cqe->checksum == cpu_to_be16(0xffff)) {
956 ip_summed = CHECKSUM_UNNECESSARY;
957 ring->csum_ok++;
958 } else {
959 ip_summed = CHECKSUM_NONE;
960 ring->csum_none++;
961 }
c27a02cd 962 } else {
f8c6455b
SM
963 if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
964 (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
965 MLX4_CQE_STATUS_IPV6))) {
966 ip_summed = CHECKSUM_COMPLETE;
967 ring->csum_complete++;
968 } else {
969 ip_summed = CHECKSUM_NONE;
970 ring->csum_none++;
971 }
c27a02cd
YP
972 }
973 } else {
974 ip_summed = CHECKSUM_NONE;
ad04378c 975 ring->csum_none++;
c27a02cd
YP
976 }
977
dd65beac
SM
978 /* This packet is eligible for GRO if it is:
979 * - DIX Ethernet (type interpretation)
980 * - TCP/IP (v4)
981 * - without IP options
982 * - not an IP fragment
dd65beac 983 */
868fdb06 984 if (dev->features & NETIF_F_GRO) {
dd65beac
SM
985 struct sk_buff *gro_skb = napi_get_frags(&cq->napi);
986 if (!gro_skb)
987 goto next;
988
989 nr = mlx4_en_complete_rx_desc(priv,
990 rx_desc, frags, gro_skb,
991 length);
992 if (!nr)
993 goto next;
994
f8c6455b
SM
995 if (ip_summed == CHECKSUM_COMPLETE) {
996 void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
79a25852
IS
997 if (check_csum(cqe, gro_skb, va,
998 dev->features)) {
f8c6455b
SM
999 ip_summed = CHECKSUM_NONE;
1000 ring->csum_none++;
1001 ring->csum_complete--;
1002 }
1003 }
1004
dd65beac
SM
1005 skb_shinfo(gro_skb)->nr_frags = nr;
1006 gro_skb->len = length;
1007 gro_skb->data_len = length;
1008 gro_skb->ip_summed = ip_summed;
1009
1010 if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
c58942f2
OG
1011 gro_skb->csum_level = 1;
1012
dd65beac 1013 if ((cqe->vlan_my_qpn &
e802f8e4 1014 cpu_to_be32(MLX4_CQE_CVLAN_PRESENT_MASK)) &&
dd65beac
SM
1015 (dev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
1016 u16 vid = be16_to_cpu(cqe->sl_vid);
1017
1018 __vlan_hwaccel_put_tag(gro_skb, htons(ETH_P_8021Q), vid);
e38af4fa
HHZ
1019 } else if ((be32_to_cpu(cqe->vlan_my_qpn) &
1020 MLX4_CQE_SVLAN_PRESENT_MASK) &&
1021 (dev->features & NETIF_F_HW_VLAN_STAG_RX)) {
1022 __vlan_hwaccel_put_tag(gro_skb,
1023 htons(ETH_P_8021AD),
1024 be16_to_cpu(cqe->sl_vid));
dd65beac
SM
1025 }
1026
1027 if (dev->features & NETIF_F_RXHASH)
1028 skb_set_hash(gro_skb,
1029 be32_to_cpu(cqe->immed_rss_invalid),
0a6d4245
ED
1030 (ip_summed == CHECKSUM_UNNECESSARY) ?
1031 PKT_HASH_TYPE_L4 :
1032 PKT_HASH_TYPE_L3);
dd65beac
SM
1033
1034 skb_record_rx_queue(gro_skb, cq->ring);
dd65beac
SM
1035
1036 if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
1037 timestamp = mlx4_en_get_cqe_ts(cqe);
1038 mlx4_en_fill_hwtstamps(mdev,
1039 skb_hwtstamps(gro_skb),
1040 timestamp);
1041 }
1042
1043 napi_gro_frags(&cq->napi);
1044 goto next;
1045 }
1046
1047 /* GRO not possible, complete processing here */
4cce66cd 1048 skb = mlx4_en_rx_skb(priv, rx_desc, frags, length);
de3d6fa8 1049 if (unlikely(!skb)) {
d21ed3a3 1050 ring->dropped++;
c27a02cd
YP
1051 goto next;
1052 }
1053
57c970c2 1054 if (unlikely(priv->validate_loopback)) {
e7c1c2c4
YP
1055 validate_loopback(priv, skb);
1056 goto next;
1057 }
1058
f8c6455b 1059 if (ip_summed == CHECKSUM_COMPLETE) {
79a25852 1060 if (check_csum(cqe, skb, skb->data, dev->features)) {
f8c6455b
SM
1061 ip_summed = CHECKSUM_NONE;
1062 ring->csum_complete--;
1063 ring->csum_none++;
1064 }
1065 }
1066
c27a02cd
YP
1067 skb->ip_summed = ip_summed;
1068 skb->protocol = eth_type_trans(skb, dev);
0c8dfc83 1069 skb_record_rx_queue(skb, cq->ring);
c27a02cd 1070
9ca8600e
TH
1071 if (l2_tunnel && ip_summed == CHECKSUM_UNNECESSARY)
1072 skb->csum_level = 1;
837052d0 1073
ad86107f 1074 if (dev->features & NETIF_F_RXHASH)
69174416
TH
1075 skb_set_hash(skb,
1076 be32_to_cpu(cqe->immed_rss_invalid),
0a6d4245
ED
1077 (ip_summed == CHECKSUM_UNNECESSARY) ?
1078 PKT_HASH_TYPE_L4 :
1079 PKT_HASH_TYPE_L3);
ad86107f 1080
ec693d47 1081 if ((be32_to_cpu(cqe->vlan_my_qpn) &
e802f8e4 1082 MLX4_CQE_CVLAN_PRESENT_MASK) &&
ec693d47 1083 (dev->features & NETIF_F_HW_VLAN_CTAG_RX))
86a9bad3 1084 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), be16_to_cpu(cqe->sl_vid));
e38af4fa
HHZ
1085 else if ((be32_to_cpu(cqe->vlan_my_qpn) &
1086 MLX4_CQE_SVLAN_PRESENT_MASK) &&
1087 (dev->features & NETIF_F_HW_VLAN_STAG_RX))
1088 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD),
1089 be16_to_cpu(cqe->sl_vid));
f1b553fb 1090
ec693d47
AV
1091 if (ring->hwtstamp_rx_filter == HWTSTAMP_FILTER_ALL) {
1092 timestamp = mlx4_en_get_cqe_ts(cqe);
1093 mlx4_en_fill_hwtstamps(mdev, skb_hwtstamps(skb),
1094 timestamp);
1095 }
1096
868fdb06 1097 napi_gro_receive(&cq->napi, skb);
c27a02cd 1098next:
4cce66cd
TLSC
1099 for (nr = 0; nr < priv->num_frags; nr++)
1100 mlx4_en_free_frag(priv, frags, nr);
1101
d576acf0 1102consumed:
c27a02cd
YP
1103 ++cq->mcq.cons_index;
1104 index = (cq->mcq.cons_index) & ring->size_mask;
b1b6b4da 1105 cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor;
f1d29a3f 1106 if (++polled == budget)
c27a02cd 1107 goto out;
c27a02cd
YP
1108 }
1109
c27a02cd 1110out:
326fe02d 1111 rcu_read_unlock();
9ecc2d86 1112
dad42c30
ED
1113 if (polled) {
1114 if (doorbell_pending)
1115 mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
1116
1117 mlx4_cq_set_ci(&cq->mcq);
1118 wmb(); /* ensure HW sees CQ consumer before we post new buffers */
1119 ring->cons = cq->mcq.cons_index;
1120 }
c27a02cd 1121 AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
dad42c30
ED
1122
1123 if (mlx4_en_refill_rx_buffers(priv, ring))
1124 mlx4_en_update_rx_prod_db(ring);
1125
c27a02cd
YP
1126 return polled;
1127}
1128
1129
1130void mlx4_en_rx_irq(struct mlx4_cq *mcq)
1131{
1132 struct mlx4_en_cq *cq = container_of(mcq, struct mlx4_en_cq, mcq);
1133 struct mlx4_en_priv *priv = netdev_priv(cq->dev);
1134
477b35b4
ED
1135 if (likely(priv->port_up))
1136 napi_schedule_irqoff(&cq->napi);
c27a02cd
YP
1137 else
1138 mlx4_en_arm_cq(priv, cq);
1139}
1140
1141/* Rx CQ polling - called by NAPI */
1142int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
1143{
1144 struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
1145 struct net_device *dev = cq->dev;
1146 struct mlx4_en_priv *priv = netdev_priv(dev);
1147 int done;
1148
1149 done = mlx4_en_process_rx_cq(dev, cq, budget);
1150
1151 /* If we used up all the quota - we're probably not done yet... */
2eacc23c 1152 if (done == budget) {
35f6f453 1153 const struct cpumask *aff;
dc2ec62f
TG
1154 struct irq_data *idata;
1155 int cpu_curr;
35f6f453 1156
c27a02cd 1157 INC_PERF_COUNTER(priv->pstats.napi_quota);
35f6f453
AV
1158
1159 cpu_curr = smp_processor_id();
dc2ec62f
TG
1160 idata = irq_desc_get_irq_data(cq->irq_desc);
1161 aff = irq_data_get_affinity_mask(idata);
35f6f453 1162
2e1af7d7
ED
1163 if (likely(cpumask_test_cpu(cpu_curr, aff)))
1164 return budget;
1165
1166 /* Current cpu is not according to smp_irq_affinity -
dad42c30
ED
1167 * probably affinity changed. Need to stop this NAPI
1168 * poll, and restart it on the right CPU.
1169 * Try to avoid returning a too small value (like 0),
1170 * to not fool net_rx_action() and its netdev_budget
2e1af7d7 1171 */
dad42c30
ED
1172 if (done)
1173 done--;
c27a02cd 1174 }
1a288172 1175 /* Done for now */
2e713283
ED
1176 if (napi_complete_done(napi, done))
1177 mlx4_en_arm_cq(priv, cq);
c27a02cd
YP
1178 return done;
1179}
1180
51151a16 1181static const int frag_sizes[] = {
c27a02cd
YP
1182 FRAG_SZ0,
1183 FRAG_SZ1,
1184 FRAG_SZ2,
1185 FRAG_SZ3
1186};
1187
1188void mlx4_en_calc_rx_buf(struct net_device *dev)
1189{
1190 struct mlx4_en_priv *priv = netdev_priv(dev);
47a38e15 1191 int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
c27a02cd
YP
1192 int i = 0;
1193
d576acf0
BB
1194 /* bpf requires buffers to be set up as 1 packet per page.
1195 * This only works when num_frags == 1.
1196 */
67f8b1dc 1197 if (priv->tx_ring_num[TX_XDP]) {
b45f0674
MKL
1198 priv->frag_info[0].order = 0;
1199 priv->frag_info[0].frag_size = eff_mtu;
1200 priv->frag_info[0].frag_prefix_size = 0;
1201 /* This will gain efficient xdp frame recycling at the
1202 * expense of more costly truesize accounting
d576acf0 1203 */
b45f0674
MKL
1204 priv->frag_info[0].frag_stride = PAGE_SIZE;
1205 priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
ea3349a0 1206 priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
b45f0674
MKL
1207 i = 1;
1208 } else {
1209 int buf_size = 0;
1210
1211 while (buf_size < eff_mtu) {
1212 priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
1213 priv->frag_info[i].frag_size =
1214 (eff_mtu > buf_size + frag_sizes[i]) ?
1215 frag_sizes[i] : eff_mtu - buf_size;
1216 priv->frag_info[i].frag_prefix_size = buf_size;
1217 priv->frag_info[i].frag_stride =
1218 ALIGN(priv->frag_info[i].frag_size,
1219 SMP_CACHE_BYTES);
1220 priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
ea3349a0 1221 priv->frag_info[i].rx_headroom = 0;
b45f0674
MKL
1222 buf_size += priv->frag_info[i].frag_size;
1223 i++;
1224 }
c27a02cd
YP
1225 }
1226
1227 priv->num_frags = i;
1228 priv->rx_skb_size = eff_mtu;
4cce66cd 1229 priv->log_rx_info = ROUNDUP_LOG2(i * sizeof(struct mlx4_en_rx_alloc));
c27a02cd 1230
1a91de28
JP
1231 en_dbg(DRV, priv, "Rx buffer scatter-list (effective-mtu:%d num_frags:%d):\n",
1232 eff_mtu, priv->num_frags);
c27a02cd 1233 for (i = 0; i < priv->num_frags; i++) {
51151a16 1234 en_err(priv,
5f6e9800 1235 " frag:%d - size:%d prefix:%d stride:%d\n",
51151a16
ED
1236 i,
1237 priv->frag_info[i].frag_size,
1238 priv->frag_info[i].frag_prefix_size,
51151a16 1239 priv->frag_info[i].frag_stride);
c27a02cd
YP
1240 }
1241}
1242
1243/* RSS related functions */
1244
9f519f68
YP
1245static int mlx4_en_config_rss_qp(struct mlx4_en_priv *priv, int qpn,
1246 struct mlx4_en_rx_ring *ring,
c27a02cd
YP
1247 enum mlx4_qp_state *state,
1248 struct mlx4_qp *qp)
1249{
1250 struct mlx4_en_dev *mdev = priv->mdev;
1251 struct mlx4_qp_context *context;
1252 int err = 0;
1253
14f8dc49
JP
1254 context = kmalloc(sizeof(*context), GFP_KERNEL);
1255 if (!context)
c27a02cd 1256 return -ENOMEM;
c27a02cd 1257
40f2287b 1258 err = mlx4_qp_alloc(mdev->dev, qpn, qp, GFP_KERNEL);
c27a02cd 1259 if (err) {
453a6082 1260 en_err(priv, "Failed to allocate qp #%x\n", qpn);
c27a02cd 1261 goto out;
c27a02cd
YP
1262 }
1263 qp->event = mlx4_en_sqp_event;
1264
1265 memset(context, 0, sizeof *context);
00d7d7bc 1266 mlx4_en_fill_qp_context(priv, ring->actual_size, ring->stride, 0, 0,
0e98b523 1267 qpn, ring->cqn, -1, context);
9f519f68 1268 context->db_rec_addr = cpu_to_be64(ring->wqres.db.dma);
c27a02cd 1269
f3a9d1f2 1270 /* Cancel FCS removal if FW allows */
4a5f4dd8 1271 if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_FCS_KEEP) {
f3a9d1f2 1272 context->param3 |= cpu_to_be32(1 << 29);
f0df3503
MM
1273 if (priv->dev->features & NETIF_F_RXFCS)
1274 ring->fcs_del = 0;
1275 else
1276 ring->fcs_del = ETH_FCS_LEN;
4a5f4dd8
YP
1277 } else
1278 ring->fcs_del = 0;
f3a9d1f2 1279
9f519f68 1280 err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, context, qp, state);
c27a02cd
YP
1281 if (err) {
1282 mlx4_qp_remove(mdev->dev, qp);
1283 mlx4_qp_free(mdev->dev, qp);
1284 }
9f519f68 1285 mlx4_en_update_rx_prod_db(ring);
c27a02cd
YP
1286out:
1287 kfree(context);
1288 return err;
1289}
1290
cabdc8ee
HHZ
1291int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
1292{
1293 int err;
1294 u32 qpn;
1295
d57febe1
MB
1296 err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
1297 MLX4_RESERVE_A0_QP);
cabdc8ee
HHZ
1298 if (err) {
1299 en_err(priv, "Failed reserving drop qpn\n");
1300 return err;
1301 }
40f2287b 1302 err = mlx4_qp_alloc(priv->mdev->dev, qpn, &priv->drop_qp, GFP_KERNEL);
cabdc8ee
HHZ
1303 if (err) {
1304 en_err(priv, "Failed allocating drop qp\n");
1305 mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
1306 return err;
1307 }
1308
1309 return 0;
1310}
1311
1312void mlx4_en_destroy_drop_qp(struct mlx4_en_priv *priv)
1313{
1314 u32 qpn;
1315
1316 qpn = priv->drop_qp.qpn;
1317 mlx4_qp_remove(priv->mdev->dev, &priv->drop_qp);
1318 mlx4_qp_free(priv->mdev->dev, &priv->drop_qp);
1319 mlx4_qp_release_range(priv->mdev->dev, qpn, 1);
1320}
1321
c27a02cd
YP
1322/* Allocate rx qp's and configure them according to rss map */
1323int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
1324{
1325 struct mlx4_en_dev *mdev = priv->mdev;
1326 struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1327 struct mlx4_qp_context context;
876f6e67 1328 struct mlx4_rss_context *rss_context;
93d3e367 1329 int rss_rings;
c27a02cd 1330 void *ptr;
876f6e67 1331 u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
1202d460 1332 MLX4_RSS_TCP_IPV6);
9f519f68 1333 int i, qpn;
c27a02cd
YP
1334 int err = 0;
1335 int good_qps = 0;
1336
453a6082 1337 en_dbg(DRV, priv, "Configuring rss steering\n");
b6b912e0
YP
1338 err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
1339 priv->rx_ring_num,
ddae0349 1340 &rss_map->base_qpn, 0);
c27a02cd 1341 if (err) {
b6b912e0 1342 en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
c27a02cd
YP
1343 return err;
1344 }
1345
b6b912e0 1346 for (i = 0; i < priv->rx_ring_num; i++) {
c27a02cd 1347 qpn = rss_map->base_qpn + i;
41d942d5 1348 err = mlx4_en_config_rss_qp(priv, qpn, priv->rx_ring[i],
c27a02cd
YP
1349 &rss_map->state[i],
1350 &rss_map->qps[i]);
1351 if (err)
1352 goto rss_err;
1353
1354 ++good_qps;
1355 }
1356
1357 /* Configure RSS indirection qp */
40f2287b 1358 err = mlx4_qp_alloc(mdev->dev, priv->base_qpn, &rss_map->indir_qp, GFP_KERNEL);
c27a02cd 1359 if (err) {
453a6082 1360 en_err(priv, "Failed to allocate RSS indirection QP\n");
1679200f 1361 goto rss_err;
c27a02cd
YP
1362 }
1363 rss_map->indir_qp.event = mlx4_en_sqp_event;
1364 mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
41d942d5 1365 priv->rx_ring[0]->cqn, -1, &context);
c27a02cd 1366
93d3e367
YP
1367 if (!priv->prof->rss_rings || priv->prof->rss_rings > priv->rx_ring_num)
1368 rss_rings = priv->rx_ring_num;
1369 else
1370 rss_rings = priv->prof->rss_rings;
1371
876f6e67
OG
1372 ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
1373 + MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
43d620c8 1374 rss_context = ptr;
93d3e367 1375 rss_context->base_qpn = cpu_to_be32(ilog2(rss_rings) << 24 |
c27a02cd 1376 (rss_map->base_qpn));
89efea25 1377 rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
1202d460
OG
1378 if (priv->mdev->profile.udp_rss) {
1379 rss_mask |= MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6;
1380 rss_context->base_qpn_udp = rss_context->default_qpn;
1381 }
837052d0
OG
1382
1383 if (mdev->dev->caps.tunnel_offload_mode == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) {
1384 en_info(priv, "Setting RSS context tunnel type to RSS on inner headers\n");
1385 rss_mask |= MLX4_RSS_BY_INNER_HEADERS;
1386 }
1387
0533943c 1388 rss_context->flags = rss_mask;
876f6e67 1389 rss_context->hash_fn = MLX4_RSS_HASH_TOP;
947cbb0a
EP
1390 if (priv->rss_hash_fn == ETH_RSS_HASH_XOR) {
1391 rss_context->hash_fn = MLX4_RSS_HASH_XOR;
1392 } else if (priv->rss_hash_fn == ETH_RSS_HASH_TOP) {
1393 rss_context->hash_fn = MLX4_RSS_HASH_TOP;
1394 memcpy(rss_context->rss_key, priv->rss_key,
1395 MLX4_EN_RSS_KEY_SIZE);
947cbb0a
EP
1396 } else {
1397 en_err(priv, "Unknown RSS hash function requested\n");
1398 err = -EINVAL;
1399 goto indir_err;
1400 }
c27a02cd
YP
1401 err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
1402 &rss_map->indir_qp, &rss_map->indir_state);
1403 if (err)
1404 goto indir_err;
1405
1406 return 0;
1407
1408indir_err:
1409 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1410 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1411 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1412 mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
c27a02cd
YP
1413rss_err:
1414 for (i = 0; i < good_qps; i++) {
1415 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1416 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1417 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1418 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1419 }
b6b912e0 1420 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
c27a02cd
YP
1421 return err;
1422}
1423
1424void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv)
1425{
1426 struct mlx4_en_dev *mdev = priv->mdev;
1427 struct mlx4_en_rss_map *rss_map = &priv->rss_map;
1428 int i;
1429
1430 mlx4_qp_modify(mdev->dev, NULL, rss_map->indir_state,
1431 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->indir_qp);
1432 mlx4_qp_remove(mdev->dev, &rss_map->indir_qp);
1433 mlx4_qp_free(mdev->dev, &rss_map->indir_qp);
c27a02cd 1434
b6b912e0 1435 for (i = 0; i < priv->rx_ring_num; i++) {
c27a02cd
YP
1436 mlx4_qp_modify(mdev->dev, NULL, rss_map->state[i],
1437 MLX4_QP_STATE_RST, NULL, 0, 0, &rss_map->qps[i]);
1438 mlx4_qp_remove(mdev->dev, &rss_map->qps[i]);
1439 mlx4_qp_free(mdev->dev, &rss_map->qps[i]);
1440 }
b6b912e0 1441 mlx4_qp_release_range(mdev->dev, rss_map->base_qpn, priv->rx_ring_num);
c27a02cd 1442}