1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /* Copyright (c) 2021, Microsoft Corporation. */
4 #include <uapi/linux/bpf.h>
6 #include <linux/inetdevice.h>
7 #include <linux/etherdevice.h>
8 #include <linux/ethtool.h>
9 #include <linux/filter.h>
11 #include <linux/pci.h>
13 #include <net/checksum.h>
14 #include <net/ip6_checksum.h>
15 #include <net/page_pool/helpers.h>
18 #include <net/mana/mana.h>
19 #include <net/mana/mana_auxiliary.h>
21 static DEFINE_IDA(mana_adev_ida
);
23 static int mana_adev_idx_alloc(void)
25 return ida_alloc(&mana_adev_ida
, GFP_KERNEL
);
28 static void mana_adev_idx_free(int idx
)
30 ida_free(&mana_adev_ida
, idx
);
33 /* Microsoft Azure Network Adapter (MANA) functions */
35 static int mana_open(struct net_device
*ndev
)
37 struct mana_port_context
*apc
= netdev_priv(ndev
);
40 err
= mana_alloc_queues(ndev
);
44 apc
->port_is_up
= true;
46 /* Ensure port state updated before txq state */
49 netif_carrier_on(ndev
);
50 netif_tx_wake_all_queues(ndev
);
55 static int mana_close(struct net_device
*ndev
)
57 struct mana_port_context
*apc
= netdev_priv(ndev
);
62 return mana_detach(ndev
, true);
65 static bool mana_can_tx(struct gdma_queue
*wq
)
67 return mana_gd_wq_avail_space(wq
) >= MAX_TX_WQE_SIZE
;
70 static unsigned int mana_checksum_info(struct sk_buff
*skb
)
72 if (skb
->protocol
== htons(ETH_P_IP
)) {
73 struct iphdr
*ip
= ip_hdr(skb
);
75 if (ip
->protocol
== IPPROTO_TCP
)
78 if (ip
->protocol
== IPPROTO_UDP
)
80 } else if (skb
->protocol
== htons(ETH_P_IPV6
)) {
81 struct ipv6hdr
*ip6
= ipv6_hdr(skb
);
83 if (ip6
->nexthdr
== IPPROTO_TCP
)
86 if (ip6
->nexthdr
== IPPROTO_UDP
)
90 /* No csum offloading */
94 static int mana_map_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
,
95 struct mana_tx_package
*tp
)
97 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
98 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
99 struct gdma_context
*gc
;
105 gc
= gd
->gdma_context
;
107 da
= dma_map_single(dev
, skb
->data
, skb_headlen(skb
), DMA_TO_DEVICE
);
109 if (dma_mapping_error(dev
, da
))
112 ash
->dma_handle
[0] = da
;
113 ash
->size
[0] = skb_headlen(skb
);
115 tp
->wqe_req
.sgl
[0].address
= ash
->dma_handle
[0];
116 tp
->wqe_req
.sgl
[0].mem_key
= gd
->gpa_mkey
;
117 tp
->wqe_req
.sgl
[0].size
= ash
->size
[0];
119 for (i
= 0; i
< skb_shinfo(skb
)->nr_frags
; i
++) {
120 frag
= &skb_shinfo(skb
)->frags
[i
];
121 da
= skb_frag_dma_map(dev
, frag
, 0, skb_frag_size(frag
),
124 if (dma_mapping_error(dev
, da
))
127 ash
->dma_handle
[i
+ 1] = da
;
128 ash
->size
[i
+ 1] = skb_frag_size(frag
);
130 tp
->wqe_req
.sgl
[i
+ 1].address
= ash
->dma_handle
[i
+ 1];
131 tp
->wqe_req
.sgl
[i
+ 1].mem_key
= gd
->gpa_mkey
;
132 tp
->wqe_req
.sgl
[i
+ 1].size
= ash
->size
[i
+ 1];
138 for (i
= i
- 1; i
>= 0; i
--)
139 dma_unmap_page(dev
, ash
->dma_handle
[i
+ 1], ash
->size
[i
+ 1],
142 dma_unmap_single(dev
, ash
->dma_handle
[0], ash
->size
[0], DMA_TO_DEVICE
);
147 netdev_tx_t
mana_start_xmit(struct sk_buff
*skb
, struct net_device
*ndev
)
149 enum mana_tx_pkt_format pkt_fmt
= MANA_SHORT_PKT_FMT
;
150 struct mana_port_context
*apc
= netdev_priv(ndev
);
151 u16 txq_idx
= skb_get_queue_mapping(skb
);
152 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
153 bool ipv4
= false, ipv6
= false;
154 struct mana_tx_package pkg
= {};
155 struct netdev_queue
*net_txq
;
156 struct mana_stats_tx
*tx_stats
;
157 struct gdma_queue
*gdma_sq
;
158 unsigned int csum_type
;
159 struct mana_txq
*txq
;
164 if (unlikely(!apc
->port_is_up
))
167 if (skb_cow_head(skb
, MANA_HEADROOM
))
170 txq
= &apc
->tx_qp
[txq_idx
].txq
;
171 gdma_sq
= txq
->gdma_sq
;
172 cq
= &apc
->tx_qp
[txq_idx
].tx_cq
;
173 tx_stats
= &txq
->stats
;
175 pkg
.tx_oob
.s_oob
.vcq_num
= cq
->gdma_id
;
176 pkg
.tx_oob
.s_oob
.vsq_frame
= txq
->vsq_frame
;
178 if (txq
->vp_offset
> MANA_SHORT_VPORT_OFFSET_MAX
) {
179 pkg
.tx_oob
.l_oob
.long_vp_offset
= txq
->vp_offset
;
180 pkt_fmt
= MANA_LONG_PKT_FMT
;
182 pkg
.tx_oob
.s_oob
.short_vp_offset
= txq
->vp_offset
;
185 if (skb_vlan_tag_present(skb
)) {
186 pkt_fmt
= MANA_LONG_PKT_FMT
;
187 pkg
.tx_oob
.l_oob
.inject_vlan_pri_tag
= 1;
188 pkg
.tx_oob
.l_oob
.pcp
= skb_vlan_tag_get_prio(skb
);
189 pkg
.tx_oob
.l_oob
.dei
= skb_vlan_tag_get_cfi(skb
);
190 pkg
.tx_oob
.l_oob
.vlan_id
= skb_vlan_tag_get_id(skb
);
193 pkg
.tx_oob
.s_oob
.pkt_fmt
= pkt_fmt
;
195 if (pkt_fmt
== MANA_SHORT_PKT_FMT
) {
196 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_short_oob
);
197 u64_stats_update_begin(&tx_stats
->syncp
);
198 tx_stats
->short_pkt_fmt
++;
199 u64_stats_update_end(&tx_stats
->syncp
);
201 pkg
.wqe_req
.inline_oob_size
= sizeof(struct mana_tx_oob
);
202 u64_stats_update_begin(&tx_stats
->syncp
);
203 tx_stats
->long_pkt_fmt
++;
204 u64_stats_update_end(&tx_stats
->syncp
);
207 pkg
.wqe_req
.inline_oob_data
= &pkg
.tx_oob
;
208 pkg
.wqe_req
.flags
= 0;
209 pkg
.wqe_req
.client_data_unit
= 0;
211 pkg
.wqe_req
.num_sge
= 1 + skb_shinfo(skb
)->nr_frags
;
212 WARN_ON_ONCE(pkg
.wqe_req
.num_sge
> MAX_TX_WQE_SGL_ENTRIES
);
214 if (pkg
.wqe_req
.num_sge
<= ARRAY_SIZE(pkg
.sgl_array
)) {
215 pkg
.wqe_req
.sgl
= pkg
.sgl_array
;
217 pkg
.sgl_ptr
= kmalloc_array(pkg
.wqe_req
.num_sge
,
218 sizeof(struct gdma_sge
),
223 pkg
.wqe_req
.sgl
= pkg
.sgl_ptr
;
226 if (skb
->protocol
== htons(ETH_P_IP
))
228 else if (skb
->protocol
== htons(ETH_P_IPV6
))
231 if (skb_is_gso(skb
)) {
232 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
233 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
235 pkg
.tx_oob
.s_oob
.comp_iphdr_csum
= 1;
236 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
237 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
239 pkg
.wqe_req
.client_data_unit
= skb_shinfo(skb
)->gso_size
;
240 pkg
.wqe_req
.flags
= GDMA_WR_OOB_IN_SGL
| GDMA_WR_PAD_BY_SGE0
;
242 ip_hdr(skb
)->tot_len
= 0;
243 ip_hdr(skb
)->check
= 0;
244 tcp_hdr(skb
)->check
=
245 ~csum_tcpudp_magic(ip_hdr(skb
)->saddr
,
246 ip_hdr(skb
)->daddr
, 0,
249 ipv6_hdr(skb
)->payload_len
= 0;
250 tcp_hdr(skb
)->check
=
251 ~csum_ipv6_magic(&ipv6_hdr(skb
)->saddr
,
252 &ipv6_hdr(skb
)->daddr
, 0,
256 if (skb
->encapsulation
) {
257 ihs
= skb_inner_tcp_all_headers(skb
);
258 u64_stats_update_begin(&tx_stats
->syncp
);
259 tx_stats
->tso_inner_packets
++;
260 tx_stats
->tso_inner_bytes
+= skb
->len
- ihs
;
261 u64_stats_update_end(&tx_stats
->syncp
);
263 if (skb_shinfo(skb
)->gso_type
& SKB_GSO_UDP_L4
) {
264 ihs
= skb_transport_offset(skb
) + sizeof(struct udphdr
);
266 ihs
= skb_tcp_all_headers(skb
);
269 u64_stats_update_begin(&tx_stats
->syncp
);
270 tx_stats
->tso_packets
++;
271 tx_stats
->tso_bytes
+= skb
->len
- ihs
;
272 u64_stats_update_end(&tx_stats
->syncp
);
275 } else if (skb
->ip_summed
== CHECKSUM_PARTIAL
) {
276 csum_type
= mana_checksum_info(skb
);
278 u64_stats_update_begin(&tx_stats
->syncp
);
279 tx_stats
->csum_partial
++;
280 u64_stats_update_end(&tx_stats
->syncp
);
282 if (csum_type
== IPPROTO_TCP
) {
283 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
284 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
286 pkg
.tx_oob
.s_oob
.comp_tcp_csum
= 1;
287 pkg
.tx_oob
.s_oob
.trans_off
= skb_transport_offset(skb
);
289 } else if (csum_type
== IPPROTO_UDP
) {
290 pkg
.tx_oob
.s_oob
.is_outer_ipv4
= ipv4
;
291 pkg
.tx_oob
.s_oob
.is_outer_ipv6
= ipv6
;
293 pkg
.tx_oob
.s_oob
.comp_udp_csum
= 1;
295 /* Can't do offload of this type of checksum */
296 if (skb_checksum_help(skb
))
301 if (mana_map_skb(skb
, apc
, &pkg
)) {
302 u64_stats_update_begin(&tx_stats
->syncp
);
303 tx_stats
->mana_map_err
++;
304 u64_stats_update_end(&tx_stats
->syncp
);
308 skb_queue_tail(&txq
->pending_skbs
, skb
);
311 net_txq
= netdev_get_tx_queue(ndev
, txq_idx
);
313 err
= mana_gd_post_work_request(gdma_sq
, &pkg
.wqe_req
,
314 (struct gdma_posted_wqe_info
*)skb
->cb
);
315 if (!mana_can_tx(gdma_sq
)) {
316 netif_tx_stop_queue(net_txq
);
317 apc
->eth_stats
.stop_queue
++;
321 (void)skb_dequeue_tail(&txq
->pending_skbs
);
322 netdev_warn(ndev
, "Failed to post TX OOB: %d\n", err
);
323 err
= NETDEV_TX_BUSY
;
328 atomic_inc(&txq
->pending_sends
);
330 mana_gd_wq_ring_doorbell(gd
->gdma_context
, gdma_sq
);
332 /* skb may be freed after mana_gd_post_work_request. Do not use it. */
335 tx_stats
= &txq
->stats
;
336 u64_stats_update_begin(&tx_stats
->syncp
);
338 tx_stats
->bytes
+= len
;
339 u64_stats_update_end(&tx_stats
->syncp
);
342 if (netif_tx_queue_stopped(net_txq
) && mana_can_tx(gdma_sq
)) {
343 netif_tx_wake_queue(net_txq
);
344 apc
->eth_stats
.wake_queue
++;
353 ndev
->stats
.tx_dropped
++;
355 dev_kfree_skb_any(skb
);
359 static void mana_get_stats64(struct net_device
*ndev
,
360 struct rtnl_link_stats64
*st
)
362 struct mana_port_context
*apc
= netdev_priv(ndev
);
363 unsigned int num_queues
= apc
->num_queues
;
364 struct mana_stats_rx
*rx_stats
;
365 struct mana_stats_tx
*tx_stats
;
370 if (!apc
->port_is_up
)
373 netdev_stats_to_stats64(st
, &ndev
->stats
);
375 for (q
= 0; q
< num_queues
; q
++) {
376 rx_stats
= &apc
->rxqs
[q
]->stats
;
379 start
= u64_stats_fetch_begin(&rx_stats
->syncp
);
380 packets
= rx_stats
->packets
;
381 bytes
= rx_stats
->bytes
;
382 } while (u64_stats_fetch_retry(&rx_stats
->syncp
, start
));
384 st
->rx_packets
+= packets
;
385 st
->rx_bytes
+= bytes
;
388 for (q
= 0; q
< num_queues
; q
++) {
389 tx_stats
= &apc
->tx_qp
[q
].txq
.stats
;
392 start
= u64_stats_fetch_begin(&tx_stats
->syncp
);
393 packets
= tx_stats
->packets
;
394 bytes
= tx_stats
->bytes
;
395 } while (u64_stats_fetch_retry(&tx_stats
->syncp
, start
));
397 st
->tx_packets
+= packets
;
398 st
->tx_bytes
+= bytes
;
402 static int mana_get_tx_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
405 struct mana_port_context
*apc
= netdev_priv(ndev
);
406 u32 hash
= skb_get_hash(skb
);
407 struct sock
*sk
= skb
->sk
;
410 txq
= apc
->indir_table
[hash
& MANA_INDIRECT_TABLE_MASK
];
412 if (txq
!= old_q
&& sk
&& sk_fullsock(sk
) &&
413 rcu_access_pointer(sk
->sk_dst_cache
))
414 sk_tx_queue_set(sk
, txq
);
419 static u16
mana_select_queue(struct net_device
*ndev
, struct sk_buff
*skb
,
420 struct net_device
*sb_dev
)
424 if (ndev
->real_num_tx_queues
== 1)
427 txq
= sk_tx_queue_get(skb
->sk
);
429 if (txq
< 0 || skb
->ooo_okay
|| txq
>= ndev
->real_num_tx_queues
) {
430 if (skb_rx_queue_recorded(skb
))
431 txq
= skb_get_rx_queue(skb
);
433 txq
= mana_get_tx_queue(ndev
, skb
, txq
);
439 /* Release pre-allocated RX buffers */
440 static void mana_pre_dealloc_rxbufs(struct mana_port_context
*mpc
)
445 dev
= mpc
->ac
->gdma_dev
->gdma_context
->dev
;
447 if (!mpc
->rxbufs_pre
)
453 while (mpc
->rxbpre_total
) {
454 i
= --mpc
->rxbpre_total
;
455 dma_unmap_single(dev
, mpc
->das_pre
[i
], mpc
->rxbpre_datasize
,
457 put_page(virt_to_head_page(mpc
->rxbufs_pre
[i
]));
464 kfree(mpc
->rxbufs_pre
);
465 mpc
->rxbufs_pre
= NULL
;
468 mpc
->rxbpre_datasize
= 0;
469 mpc
->rxbpre_alloc_size
= 0;
470 mpc
->rxbpre_headroom
= 0;
473 /* Get a buffer from the pre-allocated RX buffers */
474 static void *mana_get_rxbuf_pre(struct mana_rxq
*rxq
, dma_addr_t
*da
)
476 struct net_device
*ndev
= rxq
->ndev
;
477 struct mana_port_context
*mpc
;
480 mpc
= netdev_priv(ndev
);
482 if (!mpc
->rxbufs_pre
|| !mpc
->das_pre
|| !mpc
->rxbpre_total
) {
483 netdev_err(ndev
, "No RX pre-allocated bufs\n");
487 /* Check sizes to catch unexpected coding error */
488 if (mpc
->rxbpre_datasize
!= rxq
->datasize
) {
489 netdev_err(ndev
, "rxbpre_datasize mismatch: %u: %u\n",
490 mpc
->rxbpre_datasize
, rxq
->datasize
);
494 if (mpc
->rxbpre_alloc_size
!= rxq
->alloc_size
) {
495 netdev_err(ndev
, "rxbpre_alloc_size mismatch: %u: %u\n",
496 mpc
->rxbpre_alloc_size
, rxq
->alloc_size
);
500 if (mpc
->rxbpre_headroom
!= rxq
->headroom
) {
501 netdev_err(ndev
, "rxbpre_headroom mismatch: %u: %u\n",
502 mpc
->rxbpre_headroom
, rxq
->headroom
);
508 *da
= mpc
->das_pre
[mpc
->rxbpre_total
];
509 va
= mpc
->rxbufs_pre
[mpc
->rxbpre_total
];
510 mpc
->rxbufs_pre
[mpc
->rxbpre_total
] = NULL
;
512 /* Deallocate the array after all buffers are gone */
513 if (!mpc
->rxbpre_total
)
514 mana_pre_dealloc_rxbufs(mpc
);
519 /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */
520 static void mana_get_rxbuf_cfg(int mtu
, u32
*datasize
, u32
*alloc_size
,
523 if (mtu
> MANA_XDP_MTU_MAX
)
524 *headroom
= 0; /* no support for XDP */
526 *headroom
= XDP_PACKET_HEADROOM
;
528 *alloc_size
= mtu
+ MANA_RXBUF_PAD
+ *headroom
;
530 *datasize
= ALIGN(mtu
+ ETH_HLEN
, MANA_RX_DATA_ALIGN
);
533 static int mana_pre_alloc_rxbufs(struct mana_port_context
*mpc
, int new_mtu
)
542 mana_get_rxbuf_cfg(new_mtu
, &mpc
->rxbpre_datasize
,
543 &mpc
->rxbpre_alloc_size
, &mpc
->rxbpre_headroom
);
545 dev
= mpc
->ac
->gdma_dev
->gdma_context
->dev
;
547 num_rxb
= mpc
->num_queues
* RX_BUFFERS_PER_QUEUE
;
549 WARN(mpc
->rxbufs_pre
, "mana rxbufs_pre exists\n");
550 mpc
->rxbufs_pre
= kmalloc_array(num_rxb
, sizeof(void *), GFP_KERNEL
);
551 if (!mpc
->rxbufs_pre
)
554 mpc
->das_pre
= kmalloc_array(num_rxb
, sizeof(dma_addr_t
), GFP_KERNEL
);
558 mpc
->rxbpre_total
= 0;
560 for (i
= 0; i
< num_rxb
; i
++) {
561 if (mpc
->rxbpre_alloc_size
> PAGE_SIZE
) {
562 va
= netdev_alloc_frag(mpc
->rxbpre_alloc_size
);
566 page
= virt_to_head_page(va
);
567 /* Check if the frag falls back to single page */
568 if (compound_order(page
) <
569 get_order(mpc
->rxbpre_alloc_size
)) {
574 page
= dev_alloc_page();
578 va
= page_to_virt(page
);
581 da
= dma_map_single(dev
, va
+ mpc
->rxbpre_headroom
,
582 mpc
->rxbpre_datasize
, DMA_FROM_DEVICE
);
583 if (dma_mapping_error(dev
, da
)) {
584 put_page(virt_to_head_page(va
));
588 mpc
->rxbufs_pre
[i
] = va
;
589 mpc
->das_pre
[i
] = da
;
590 mpc
->rxbpre_total
= i
+ 1;
596 mana_pre_dealloc_rxbufs(mpc
);
600 static int mana_change_mtu(struct net_device
*ndev
, int new_mtu
)
602 struct mana_port_context
*mpc
= netdev_priv(ndev
);
603 unsigned int old_mtu
= ndev
->mtu
;
606 /* Pre-allocate buffers to prevent failure in mana_attach later */
607 err
= mana_pre_alloc_rxbufs(mpc
, new_mtu
);
609 netdev_err(ndev
, "Insufficient memory for new MTU\n");
613 err
= mana_detach(ndev
, false);
615 netdev_err(ndev
, "mana_detach failed: %d\n", err
);
621 err
= mana_attach(ndev
);
623 netdev_err(ndev
, "mana_attach failed: %d\n", err
);
628 mana_pre_dealloc_rxbufs(mpc
);
632 static const struct net_device_ops mana_devops
= {
633 .ndo_open
= mana_open
,
634 .ndo_stop
= mana_close
,
635 .ndo_select_queue
= mana_select_queue
,
636 .ndo_start_xmit
= mana_start_xmit
,
637 .ndo_validate_addr
= eth_validate_addr
,
638 .ndo_get_stats64
= mana_get_stats64
,
640 .ndo_xdp_xmit
= mana_xdp_xmit
,
641 .ndo_change_mtu
= mana_change_mtu
,
644 static void mana_cleanup_port_context(struct mana_port_context
*apc
)
650 static int mana_init_port_context(struct mana_port_context
*apc
)
652 apc
->rxqs
= kcalloc(apc
->num_queues
, sizeof(struct mana_rxq
*),
655 return !apc
->rxqs
? -ENOMEM
: 0;
658 static int mana_send_request(struct mana_context
*ac
, void *in_buf
,
659 u32 in_len
, void *out_buf
, u32 out_len
)
661 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
662 struct gdma_resp_hdr
*resp
= out_buf
;
663 struct gdma_req_hdr
*req
= in_buf
;
664 struct device
*dev
= gc
->dev
;
665 static atomic_t activity_id
;
668 req
->dev_id
= gc
->mana
.dev_id
;
669 req
->activity_id
= atomic_inc_return(&activity_id
);
671 err
= mana_gd_send_request(gc
, in_len
, in_buf
, out_len
,
673 if (err
|| resp
->status
) {
674 dev_err(dev
, "Failed to send mana message: %d, 0x%x\n",
676 return err
? err
: -EPROTO
;
679 if (req
->dev_id
.as_uint32
!= resp
->dev_id
.as_uint32
||
680 req
->activity_id
!= resp
->activity_id
) {
681 dev_err(dev
, "Unexpected mana message response: %x,%x,%x,%x\n",
682 req
->dev_id
.as_uint32
, resp
->dev_id
.as_uint32
,
683 req
->activity_id
, resp
->activity_id
);
690 static int mana_verify_resp_hdr(const struct gdma_resp_hdr
*resp_hdr
,
691 const enum mana_command_code expected_code
,
694 if (resp_hdr
->response
.msg_type
!= expected_code
)
697 if (resp_hdr
->response
.msg_version
< GDMA_MESSAGE_V1
)
700 if (resp_hdr
->response
.msg_size
< min_size
)
706 static int mana_pf_register_hw_vport(struct mana_port_context
*apc
)
708 struct mana_register_hw_vport_resp resp
= {};
709 struct mana_register_hw_vport_req req
= {};
712 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_HW_PORT
,
713 sizeof(req
), sizeof(resp
));
714 req
.attached_gfid
= 1;
715 req
.is_pf_default_vport
= 1;
716 req
.allow_all_ether_types
= 1;
718 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
721 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d\n", err
);
725 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_HW_PORT
,
727 if (err
|| resp
.hdr
.status
) {
728 netdev_err(apc
->ndev
, "Failed to register hw vPort: %d, 0x%x\n",
729 err
, resp
.hdr
.status
);
730 return err
? err
: -EPROTO
;
733 apc
->port_handle
= resp
.hw_vport_handle
;
737 static void mana_pf_deregister_hw_vport(struct mana_port_context
*apc
)
739 struct mana_deregister_hw_vport_resp resp
= {};
740 struct mana_deregister_hw_vport_req req
= {};
743 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_HW_PORT
,
744 sizeof(req
), sizeof(resp
));
745 req
.hw_vport_handle
= apc
->port_handle
;
747 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
750 netdev_err(apc
->ndev
, "Failed to unregister hw vPort: %d\n",
755 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_HW_PORT
,
757 if (err
|| resp
.hdr
.status
)
758 netdev_err(apc
->ndev
,
759 "Failed to deregister hw vPort: %d, 0x%x\n",
760 err
, resp
.hdr
.status
);
763 static int mana_pf_register_filter(struct mana_port_context
*apc
)
765 struct mana_register_filter_resp resp
= {};
766 struct mana_register_filter_req req
= {};
769 mana_gd_init_req_hdr(&req
.hdr
, MANA_REGISTER_FILTER
,
770 sizeof(req
), sizeof(resp
));
771 req
.vport
= apc
->port_handle
;
772 memcpy(req
.mac_addr
, apc
->mac_addr
, ETH_ALEN
);
774 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
777 netdev_err(apc
->ndev
, "Failed to register filter: %d\n", err
);
781 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_REGISTER_FILTER
,
783 if (err
|| resp
.hdr
.status
) {
784 netdev_err(apc
->ndev
, "Failed to register filter: %d, 0x%x\n",
785 err
, resp
.hdr
.status
);
786 return err
? err
: -EPROTO
;
789 apc
->pf_filter_handle
= resp
.filter_handle
;
793 static void mana_pf_deregister_filter(struct mana_port_context
*apc
)
795 struct mana_deregister_filter_resp resp
= {};
796 struct mana_deregister_filter_req req
= {};
799 mana_gd_init_req_hdr(&req
.hdr
, MANA_DEREGISTER_FILTER
,
800 sizeof(req
), sizeof(resp
));
801 req
.filter_handle
= apc
->pf_filter_handle
;
803 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
806 netdev_err(apc
->ndev
, "Failed to unregister filter: %d\n",
811 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DEREGISTER_FILTER
,
813 if (err
|| resp
.hdr
.status
)
814 netdev_err(apc
->ndev
,
815 "Failed to deregister filter: %d, 0x%x\n",
816 err
, resp
.hdr
.status
);
819 static int mana_query_device_cfg(struct mana_context
*ac
, u32 proto_major_ver
,
820 u32 proto_minor_ver
, u32 proto_micro_ver
,
823 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
824 struct mana_query_device_cfg_resp resp
= {};
825 struct mana_query_device_cfg_req req
= {};
826 struct device
*dev
= gc
->dev
;
829 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_DEV_CONFIG
,
830 sizeof(req
), sizeof(resp
));
832 req
.hdr
.resp
.msg_version
= GDMA_MESSAGE_V2
;
834 req
.proto_major_ver
= proto_major_ver
;
835 req
.proto_minor_ver
= proto_minor_ver
;
836 req
.proto_micro_ver
= proto_micro_ver
;
838 err
= mana_send_request(ac
, &req
, sizeof(req
), &resp
, sizeof(resp
));
840 dev_err(dev
, "Failed to query config: %d", err
);
844 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_DEV_CONFIG
,
846 if (err
|| resp
.hdr
.status
) {
847 dev_err(dev
, "Invalid query result: %d, 0x%x\n", err
,
854 *max_num_vports
= resp
.max_num_vports
;
856 if (resp
.hdr
.response
.msg_version
== GDMA_MESSAGE_V2
)
857 gc
->adapter_mtu
= resp
.adapter_mtu
;
859 gc
->adapter_mtu
= ETH_FRAME_LEN
;
864 static int mana_query_vport_cfg(struct mana_port_context
*apc
, u32 vport_index
,
865 u32
*max_sq
, u32
*max_rq
, u32
*num_indir_entry
)
867 struct mana_query_vport_cfg_resp resp
= {};
868 struct mana_query_vport_cfg_req req
= {};
871 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_VPORT_CONFIG
,
872 sizeof(req
), sizeof(resp
));
874 req
.vport_index
= vport_index
;
876 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
881 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_VPORT_CONFIG
,
889 *max_sq
= resp
.max_num_sq
;
890 *max_rq
= resp
.max_num_rq
;
891 *num_indir_entry
= resp
.num_indirection_ent
;
893 apc
->port_handle
= resp
.vport
;
894 ether_addr_copy(apc
->mac_addr
, resp
.mac_addr
);
899 void mana_uncfg_vport(struct mana_port_context
*apc
)
901 mutex_lock(&apc
->vport_mutex
);
902 apc
->vport_use_count
--;
903 WARN_ON(apc
->vport_use_count
< 0);
904 mutex_unlock(&apc
->vport_mutex
);
906 EXPORT_SYMBOL_NS(mana_uncfg_vport
, NET_MANA
);
908 int mana_cfg_vport(struct mana_port_context
*apc
, u32 protection_dom_id
,
911 struct mana_config_vport_resp resp
= {};
912 struct mana_config_vport_req req
= {};
915 /* This function is used to program the Ethernet port in the hardware
916 * table. It can be called from the Ethernet driver or the RDMA driver.
918 * For Ethernet usage, the hardware supports only one active user on a
919 * physical port. The driver checks on the port usage before programming
920 * the hardware when creating the RAW QP (RDMA driver) or exposing the
921 * device to kernel NET layer (Ethernet driver).
923 * Because the RDMA driver doesn't know in advance which QP type the
924 * user will create, it exposes the device with all its ports. The user
925 * may not be able to create RAW QP on a port if this port is already
926 * in used by the Ethernet driver from the kernel.
928 * This physical port limitation only applies to the RAW QP. For RC QP,
929 * the hardware doesn't have this limitation. The user can create RC
930 * QPs on a physical port up to the hardware limits independent of the
931 * Ethernet usage on the same port.
933 mutex_lock(&apc
->vport_mutex
);
934 if (apc
->vport_use_count
> 0) {
935 mutex_unlock(&apc
->vport_mutex
);
938 apc
->vport_use_count
++;
939 mutex_unlock(&apc
->vport_mutex
);
941 mana_gd_init_req_hdr(&req
.hdr
, MANA_CONFIG_VPORT_TX
,
942 sizeof(req
), sizeof(resp
));
943 req
.vport
= apc
->port_handle
;
944 req
.pdid
= protection_dom_id
;
945 req
.doorbell_pageid
= doorbell_pg_id
;
947 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
950 netdev_err(apc
->ndev
, "Failed to configure vPort: %d\n", err
);
954 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_TX
,
956 if (err
|| resp
.hdr
.status
) {
957 netdev_err(apc
->ndev
, "Failed to configure vPort: %d, 0x%x\n",
958 err
, resp
.hdr
.status
);
965 apc
->tx_shortform_allowed
= resp
.short_form_allowed
;
966 apc
->tx_vp_offset
= resp
.tx_vport_offset
;
968 netdev_info(apc
->ndev
, "Configured vPort %llu PD %u DB %u\n",
969 apc
->port_handle
, protection_dom_id
, doorbell_pg_id
);
972 mana_uncfg_vport(apc
);
976 EXPORT_SYMBOL_NS(mana_cfg_vport
, NET_MANA
);
978 static int mana_cfg_vport_steering(struct mana_port_context
*apc
,
980 bool update_default_rxobj
, bool update_key
,
983 u16 num_entries
= MANA_INDIRECT_TABLE_SIZE
;
984 struct mana_cfg_rx_steer_req_v2
*req
;
985 struct mana_cfg_rx_steer_resp resp
= {};
986 struct net_device
*ndev
= apc
->ndev
;
987 mana_handle_t
*req_indir_tab
;
991 req_buf_size
= sizeof(*req
) + sizeof(mana_handle_t
) * num_entries
;
992 req
= kzalloc(req_buf_size
, GFP_KERNEL
);
996 mana_gd_init_req_hdr(&req
->hdr
, MANA_CONFIG_VPORT_RX
, req_buf_size
,
999 req
->hdr
.req
.msg_version
= GDMA_MESSAGE_V2
;
1001 req
->vport
= apc
->port_handle
;
1002 req
->num_indir_entries
= num_entries
;
1003 req
->indir_tab_offset
= sizeof(*req
);
1004 req
->rx_enable
= rx
;
1005 req
->rss_enable
= apc
->rss_state
;
1006 req
->update_default_rxobj
= update_default_rxobj
;
1007 req
->update_hashkey
= update_key
;
1008 req
->update_indir_tab
= update_tab
;
1009 req
->default_rxobj
= apc
->default_rxobj
;
1010 req
->cqe_coalescing_enable
= 0;
1013 memcpy(&req
->hashkey
, apc
->hashkey
, MANA_HASH_KEY_SIZE
);
1016 req_indir_tab
= (mana_handle_t
*)(req
+ 1);
1017 memcpy(req_indir_tab
, apc
->rxobj_table
,
1018 req
->num_indir_entries
* sizeof(mana_handle_t
));
1021 err
= mana_send_request(apc
->ac
, req
, req_buf_size
, &resp
,
1024 netdev_err(ndev
, "Failed to configure vPort RX: %d\n", err
);
1028 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CONFIG_VPORT_RX
,
1031 netdev_err(ndev
, "vPort RX configuration failed: %d\n", err
);
1035 if (resp
.hdr
.status
) {
1036 netdev_err(ndev
, "vPort RX configuration failed: 0x%x\n",
1041 netdev_info(ndev
, "Configured steering vPort %llu entries %u\n",
1042 apc
->port_handle
, num_entries
);
1048 int mana_create_wq_obj(struct mana_port_context
*apc
,
1049 mana_handle_t vport
,
1050 u32 wq_type
, struct mana_obj_spec
*wq_spec
,
1051 struct mana_obj_spec
*cq_spec
,
1052 mana_handle_t
*wq_obj
)
1054 struct mana_create_wqobj_resp resp
= {};
1055 struct mana_create_wqobj_req req
= {};
1056 struct net_device
*ndev
= apc
->ndev
;
1059 mana_gd_init_req_hdr(&req
.hdr
, MANA_CREATE_WQ_OBJ
,
1060 sizeof(req
), sizeof(resp
));
1062 req
.wq_type
= wq_type
;
1063 req
.wq_gdma_region
= wq_spec
->gdma_region
;
1064 req
.cq_gdma_region
= cq_spec
->gdma_region
;
1065 req
.wq_size
= wq_spec
->queue_size
;
1066 req
.cq_size
= cq_spec
->queue_size
;
1067 req
.cq_moderation_ctx_id
= cq_spec
->modr_ctx_id
;
1068 req
.cq_parent_qid
= cq_spec
->attached_eq
;
1070 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1073 netdev_err(ndev
, "Failed to create WQ object: %d\n", err
);
1077 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_CREATE_WQ_OBJ
,
1079 if (err
|| resp
.hdr
.status
) {
1080 netdev_err(ndev
, "Failed to create WQ object: %d, 0x%x\n", err
,
1087 if (resp
.wq_obj
== INVALID_MANA_HANDLE
) {
1088 netdev_err(ndev
, "Got an invalid WQ object handle\n");
1093 *wq_obj
= resp
.wq_obj
;
1094 wq_spec
->queue_index
= resp
.wq_id
;
1095 cq_spec
->queue_index
= resp
.cq_id
;
1101 EXPORT_SYMBOL_NS(mana_create_wq_obj
, NET_MANA
);
1103 void mana_destroy_wq_obj(struct mana_port_context
*apc
, u32 wq_type
,
1104 mana_handle_t wq_obj
)
1106 struct mana_destroy_wqobj_resp resp
= {};
1107 struct mana_destroy_wqobj_req req
= {};
1108 struct net_device
*ndev
= apc
->ndev
;
1111 mana_gd_init_req_hdr(&req
.hdr
, MANA_DESTROY_WQ_OBJ
,
1112 sizeof(req
), sizeof(resp
));
1113 req
.wq_type
= wq_type
;
1114 req
.wq_obj_handle
= wq_obj
;
1116 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1119 netdev_err(ndev
, "Failed to destroy WQ object: %d\n", err
);
1123 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_DESTROY_WQ_OBJ
,
1125 if (err
|| resp
.hdr
.status
)
1126 netdev_err(ndev
, "Failed to destroy WQ object: %d, 0x%x\n", err
,
1129 EXPORT_SYMBOL_NS(mana_destroy_wq_obj
, NET_MANA
);
1131 static void mana_destroy_eq(struct mana_context
*ac
)
1133 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
1134 struct gdma_queue
*eq
;
1140 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
1145 mana_gd_destroy_queue(gc
, eq
);
1152 static int mana_create_eq(struct mana_context
*ac
)
1154 struct gdma_dev
*gd
= ac
->gdma_dev
;
1155 struct gdma_context
*gc
= gd
->gdma_context
;
1156 struct gdma_queue_spec spec
= {};
1160 ac
->eqs
= kcalloc(gc
->max_num_queues
, sizeof(struct mana_eq
),
1165 spec
.type
= GDMA_EQ
;
1166 spec
.monitor_avl_buf
= false;
1167 spec
.queue_size
= EQ_SIZE
;
1168 spec
.eq
.callback
= NULL
;
1169 spec
.eq
.context
= ac
->eqs
;
1170 spec
.eq
.log2_throttle_limit
= LOG2_EQ_THROTTLE
;
1172 for (i
= 0; i
< gc
->max_num_queues
; i
++) {
1173 err
= mana_gd_create_mana_eq(gd
, &spec
, &ac
->eqs
[i
].eq
);
1180 mana_destroy_eq(ac
);
1184 static int mana_fence_rq(struct mana_port_context
*apc
, struct mana_rxq
*rxq
)
1186 struct mana_fence_rq_resp resp
= {};
1187 struct mana_fence_rq_req req
= {};
1190 init_completion(&rxq
->fence_event
);
1192 mana_gd_init_req_hdr(&req
.hdr
, MANA_FENCE_RQ
,
1193 sizeof(req
), sizeof(resp
));
1194 req
.wq_obj_handle
= rxq
->rxobj
;
1196 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
1199 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d\n",
1204 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_FENCE_RQ
, sizeof(resp
));
1205 if (err
|| resp
.hdr
.status
) {
1206 netdev_err(apc
->ndev
, "Failed to fence RQ %u: %d, 0x%x\n",
1207 rxq
->rxq_idx
, err
, resp
.hdr
.status
);
1214 if (wait_for_completion_timeout(&rxq
->fence_event
, 10 * HZ
) == 0) {
1215 netdev_err(apc
->ndev
, "Failed to fence RQ %u: timed out\n",
1223 static void mana_fence_rqs(struct mana_port_context
*apc
)
1225 unsigned int rxq_idx
;
1226 struct mana_rxq
*rxq
;
1229 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
1230 rxq
= apc
->rxqs
[rxq_idx
];
1231 err
= mana_fence_rq(apc
, rxq
);
1233 /* In case of any error, use sleep instead. */
1239 static int mana_move_wq_tail(struct gdma_queue
*wq
, u32 num_units
)
1244 used_space_old
= wq
->head
- wq
->tail
;
1245 used_space_new
= wq
->head
- (wq
->tail
+ num_units
);
1247 if (WARN_ON_ONCE(used_space_new
> used_space_old
))
1250 wq
->tail
+= num_units
;
1254 static void mana_unmap_skb(struct sk_buff
*skb
, struct mana_port_context
*apc
)
1256 struct mana_skb_head
*ash
= (struct mana_skb_head
*)skb
->head
;
1257 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1258 struct device
*dev
= gc
->dev
;
1261 dma_unmap_single(dev
, ash
->dma_handle
[0], ash
->size
[0], DMA_TO_DEVICE
);
1263 for (i
= 1; i
< skb_shinfo(skb
)->nr_frags
+ 1; i
++)
1264 dma_unmap_page(dev
, ash
->dma_handle
[i
], ash
->size
[i
],
1268 static void mana_poll_tx_cq(struct mana_cq
*cq
)
1270 struct gdma_comp
*completions
= cq
->gdma_comp_buf
;
1271 struct gdma_posted_wqe_info
*wqe_info
;
1272 unsigned int pkt_transmitted
= 0;
1273 unsigned int wqe_unit_cnt
= 0;
1274 struct mana_txq
*txq
= cq
->txq
;
1275 struct mana_port_context
*apc
;
1276 struct netdev_queue
*net_txq
;
1277 struct gdma_queue
*gdma_wq
;
1278 unsigned int avail_space
;
1279 struct net_device
*ndev
;
1280 struct sk_buff
*skb
;
1286 apc
= netdev_priv(ndev
);
1288 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, completions
,
1289 CQE_POLLING_BUFFER
);
1294 for (i
= 0; i
< comp_read
; i
++) {
1295 struct mana_tx_comp_oob
*cqe_oob
;
1297 if (WARN_ON_ONCE(!completions
[i
].is_sq
))
1300 cqe_oob
= (struct mana_tx_comp_oob
*)completions
[i
].cqe_data
;
1301 if (WARN_ON_ONCE(cqe_oob
->cqe_hdr
.client_type
!=
1302 MANA_CQE_COMPLETION
))
1305 switch (cqe_oob
->cqe_hdr
.cqe_type
) {
1309 case CQE_TX_SA_DROP
:
1310 case CQE_TX_MTU_DROP
:
1311 case CQE_TX_INVALID_OOB
:
1312 case CQE_TX_INVALID_ETH_TYPE
:
1313 case CQE_TX_HDR_PROCESSING_ERROR
:
1314 case CQE_TX_VF_DISABLED
:
1315 case CQE_TX_VPORT_IDX_OUT_OF_RANGE
:
1316 case CQE_TX_VPORT_DISABLED
:
1317 case CQE_TX_VLAN_TAGGING_VIOLATION
:
1318 if (net_ratelimit())
1319 netdev_err(ndev
, "TX: CQE error %d\n",
1320 cqe_oob
->cqe_hdr
.cqe_type
);
1322 apc
->eth_stats
.tx_cqe_err
++;
1326 /* If the CQE type is unknown, log an error,
1327 * and still free the SKB, update tail, etc.
1329 if (net_ratelimit())
1330 netdev_err(ndev
, "TX: unknown CQE type %d\n",
1331 cqe_oob
->cqe_hdr
.cqe_type
);
1333 apc
->eth_stats
.tx_cqe_unknown_type
++;
1337 if (WARN_ON_ONCE(txq
->gdma_txq_id
!= completions
[i
].wq_num
))
1340 skb
= skb_dequeue(&txq
->pending_skbs
);
1341 if (WARN_ON_ONCE(!skb
))
1344 wqe_info
= (struct gdma_posted_wqe_info
*)skb
->cb
;
1345 wqe_unit_cnt
+= wqe_info
->wqe_size_in_bu
;
1347 mana_unmap_skb(skb
, apc
);
1349 napi_consume_skb(skb
, cq
->budget
);
1354 if (WARN_ON_ONCE(wqe_unit_cnt
== 0))
1357 mana_move_wq_tail(txq
->gdma_sq
, wqe_unit_cnt
);
1359 gdma_wq
= txq
->gdma_sq
;
1360 avail_space
= mana_gd_wq_avail_space(gdma_wq
);
1362 /* Ensure tail updated before checking q stop */
1365 net_txq
= txq
->net_txq
;
1366 txq_stopped
= netif_tx_queue_stopped(net_txq
);
1368 /* Ensure checking txq_stopped before apc->port_is_up. */
1371 if (txq_stopped
&& apc
->port_is_up
&& avail_space
>= MAX_TX_WQE_SIZE
) {
1372 netif_tx_wake_queue(net_txq
);
1373 apc
->eth_stats
.wake_queue
++;
1376 if (atomic_sub_return(pkt_transmitted
, &txq
->pending_sends
) < 0)
1379 cq
->work_done
= pkt_transmitted
;
1382 static void mana_post_pkt_rxq(struct mana_rxq
*rxq
)
1384 struct mana_recv_buf_oob
*recv_buf_oob
;
1388 curr_index
= rxq
->buf_index
++;
1389 if (rxq
->buf_index
== rxq
->num_rx_buf
)
1392 recv_buf_oob
= &rxq
->rx_oobs
[curr_index
];
1394 err
= mana_gd_post_work_request(rxq
->gdma_rq
, &recv_buf_oob
->wqe_req
,
1395 &recv_buf_oob
->wqe_inf
);
1396 if (WARN_ON_ONCE(err
))
1399 WARN_ON_ONCE(recv_buf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1402 static struct sk_buff
*mana_build_skb(struct mana_rxq
*rxq
, void *buf_va
,
1403 uint pkt_len
, struct xdp_buff
*xdp
)
1405 struct sk_buff
*skb
= napi_build_skb(buf_va
, rxq
->alloc_size
);
1410 if (xdp
->data_hard_start
) {
1411 skb_reserve(skb
, xdp
->data
- xdp
->data_hard_start
);
1412 skb_put(skb
, xdp
->data_end
- xdp
->data
);
1416 skb_reserve(skb
, rxq
->headroom
);
1417 skb_put(skb
, pkt_len
);
1422 static void mana_rx_skb(void *buf_va
, bool from_pool
,
1423 struct mana_rxcomp_oob
*cqe
, struct mana_rxq
*rxq
)
1425 struct mana_stats_rx
*rx_stats
= &rxq
->stats
;
1426 struct net_device
*ndev
= rxq
->ndev
;
1427 uint pkt_len
= cqe
->ppi
[0].pkt_len
;
1428 u16 rxq_idx
= rxq
->rxq_idx
;
1429 struct napi_struct
*napi
;
1430 struct xdp_buff xdp
= {};
1431 struct sk_buff
*skb
;
1435 rxq
->rx_cq
.work_done
++;
1436 napi
= &rxq
->rx_cq
.napi
;
1439 ++ndev
->stats
.rx_dropped
;
1443 act
= mana_run_xdp(ndev
, rxq
, &xdp
, buf_va
, pkt_len
);
1445 if (act
== XDP_REDIRECT
&& !rxq
->xdp_rc
)
1448 if (act
!= XDP_PASS
&& act
!= XDP_TX
)
1451 skb
= mana_build_skb(rxq
, buf_va
, pkt_len
, &xdp
);
1457 skb_mark_for_recycle(skb
);
1459 skb
->dev
= napi
->dev
;
1461 skb
->protocol
= eth_type_trans(skb
, ndev
);
1462 skb_checksum_none_assert(skb
);
1463 skb_record_rx_queue(skb
, rxq_idx
);
1465 if ((ndev
->features
& NETIF_F_RXCSUM
) && cqe
->rx_iphdr_csum_succeed
) {
1466 if (cqe
->rx_tcp_csum_succeed
|| cqe
->rx_udp_csum_succeed
)
1467 skb
->ip_summed
= CHECKSUM_UNNECESSARY
;
1470 if (cqe
->rx_hashtype
!= 0 && (ndev
->features
& NETIF_F_RXHASH
)) {
1471 hash_value
= cqe
->ppi
[0].pkt_hash
;
1473 if (cqe
->rx_hashtype
& MANA_HASH_L4
)
1474 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L4
);
1476 skb_set_hash(skb
, hash_value
, PKT_HASH_TYPE_L3
);
1479 if (cqe
->rx_vlantag_present
) {
1480 u16 vlan_tci
= cqe
->rx_vlan_id
;
1482 __vlan_hwaccel_put_tag(skb
, htons(ETH_P_8021Q
), vlan_tci
);
1485 u64_stats_update_begin(&rx_stats
->syncp
);
1486 rx_stats
->packets
++;
1487 rx_stats
->bytes
+= pkt_len
;
1491 u64_stats_update_end(&rx_stats
->syncp
);
1493 if (act
== XDP_TX
) {
1494 skb_set_queue_mapping(skb
, rxq_idx
);
1495 mana_xdp_tx(skb
, ndev
);
1499 napi_gro_receive(napi
, skb
);
1504 u64_stats_update_begin(&rx_stats
->syncp
);
1505 rx_stats
->xdp_drop
++;
1506 u64_stats_update_end(&rx_stats
->syncp
);
1510 page_pool_recycle_direct(rxq
->page_pool
,
1511 virt_to_head_page(buf_va
));
1513 WARN_ON_ONCE(rxq
->xdp_save_va
);
1514 /* Save for reuse */
1515 rxq
->xdp_save_va
= buf_va
;
1518 ++ndev
->stats
.rx_dropped
;
1523 static void *mana_get_rxfrag(struct mana_rxq
*rxq
, struct device
*dev
,
1524 dma_addr_t
*da
, bool *from_pool
, bool is_napi
)
1531 /* Reuse XDP dropped page if available */
1532 if (rxq
->xdp_save_va
) {
1533 va
= rxq
->xdp_save_va
;
1534 rxq
->xdp_save_va
= NULL
;
1535 } else if (rxq
->alloc_size
> PAGE_SIZE
) {
1537 va
= napi_alloc_frag(rxq
->alloc_size
);
1539 va
= netdev_alloc_frag(rxq
->alloc_size
);
1544 page
= virt_to_head_page(va
);
1545 /* Check if the frag falls back to single page */
1546 if (compound_order(page
) < get_order(rxq
->alloc_size
)) {
1551 page
= page_pool_dev_alloc_pages(rxq
->page_pool
);
1556 va
= page_to_virt(page
);
1559 *da
= dma_map_single(dev
, va
+ rxq
->headroom
, rxq
->datasize
,
1561 if (dma_mapping_error(dev
, *da
)) {
1563 page_pool_put_full_page(rxq
->page_pool
, page
, false);
1565 put_page(virt_to_head_page(va
));
1573 /* Allocate frag for rx buffer, and save the old buf */
1574 static void mana_refill_rx_oob(struct device
*dev
, struct mana_rxq
*rxq
,
1575 struct mana_recv_buf_oob
*rxoob
, void **old_buf
,
1582 va
= mana_get_rxfrag(rxq
, dev
, &da
, &from_pool
, true);
1586 dma_unmap_single(dev
, rxoob
->sgl
[0].address
, rxq
->datasize
,
1588 *old_buf
= rxoob
->buf_va
;
1589 *old_fp
= rxoob
->from_pool
;
1592 rxoob
->sgl
[0].address
= da
;
1593 rxoob
->from_pool
= from_pool
;
1596 static void mana_process_rx_cqe(struct mana_rxq
*rxq
, struct mana_cq
*cq
,
1597 struct gdma_comp
*cqe
)
1599 struct mana_rxcomp_oob
*oob
= (struct mana_rxcomp_oob
*)cqe
->cqe_data
;
1600 struct gdma_context
*gc
= rxq
->gdma_rq
->gdma_dev
->gdma_context
;
1601 struct net_device
*ndev
= rxq
->ndev
;
1602 struct mana_recv_buf_oob
*rxbuf_oob
;
1603 struct mana_port_context
*apc
;
1604 struct device
*dev
= gc
->dev
;
1605 void *old_buf
= NULL
;
1609 apc
= netdev_priv(ndev
);
1611 switch (oob
->cqe_hdr
.cqe_type
) {
1615 case CQE_RX_TRUNCATED
:
1616 ++ndev
->stats
.rx_dropped
;
1617 rxbuf_oob
= &rxq
->rx_oobs
[rxq
->buf_index
];
1618 netdev_warn_once(ndev
, "Dropped a truncated packet\n");
1621 case CQE_RX_COALESCED_4
:
1622 netdev_err(ndev
, "RX coalescing is unsupported\n");
1623 apc
->eth_stats
.rx_coalesced_err
++;
1626 case CQE_RX_OBJECT_FENCE
:
1627 complete(&rxq
->fence_event
);
1631 netdev_err(ndev
, "Unknown RX CQE type = %d\n",
1632 oob
->cqe_hdr
.cqe_type
);
1633 apc
->eth_stats
.rx_cqe_unknown_type
++;
1637 pktlen
= oob
->ppi
[0].pkt_len
;
1640 /* data packets should never have packetlength of zero */
1641 netdev_err(ndev
, "RX pkt len=0, rq=%u, cq=%u, rxobj=0x%llx\n",
1642 rxq
->gdma_id
, cq
->gdma_id
, rxq
->rxobj
);
1646 curr
= rxq
->buf_index
;
1647 rxbuf_oob
= &rxq
->rx_oobs
[curr
];
1648 WARN_ON_ONCE(rxbuf_oob
->wqe_inf
.wqe_size_in_bu
!= 1);
1650 mana_refill_rx_oob(dev
, rxq
, rxbuf_oob
, &old_buf
, &old_fp
);
1652 /* Unsuccessful refill will have old_buf == NULL.
1653 * In this case, mana_rx_skb() will drop the packet.
1655 mana_rx_skb(old_buf
, old_fp
, oob
, rxq
);
1658 mana_move_wq_tail(rxq
->gdma_rq
, rxbuf_oob
->wqe_inf
.wqe_size_in_bu
);
1660 mana_post_pkt_rxq(rxq
);
1663 static void mana_poll_rx_cq(struct mana_cq
*cq
)
1665 struct gdma_comp
*comp
= cq
->gdma_comp_buf
;
1666 struct mana_rxq
*rxq
= cq
->rxq
;
1669 comp_read
= mana_gd_poll_cq(cq
->gdma_cq
, comp
, CQE_POLLING_BUFFER
);
1670 WARN_ON_ONCE(comp_read
> CQE_POLLING_BUFFER
);
1672 rxq
->xdp_flush
= false;
1674 for (i
= 0; i
< comp_read
; i
++) {
1675 if (WARN_ON_ONCE(comp
[i
].is_sq
))
1678 /* verify recv cqe references the right rxq */
1679 if (WARN_ON_ONCE(comp
[i
].wq_num
!= cq
->rxq
->gdma_id
))
1682 mana_process_rx_cqe(rxq
, cq
, &comp
[i
]);
1685 if (comp_read
> 0) {
1686 struct gdma_context
*gc
= rxq
->gdma_rq
->gdma_dev
->gdma_context
;
1688 mana_gd_wq_ring_doorbell(gc
, rxq
->gdma_rq
);
1695 static int mana_cq_handler(void *context
, struct gdma_queue
*gdma_queue
)
1697 struct mana_cq
*cq
= context
;
1701 WARN_ON_ONCE(cq
->gdma_cq
!= gdma_queue
);
1703 if (cq
->type
== MANA_CQ_TYPE_RX
)
1704 mana_poll_rx_cq(cq
);
1706 mana_poll_tx_cq(cq
);
1710 if (w
< cq
->budget
&&
1711 napi_complete_done(&cq
->napi
, w
)) {
1712 arm_bit
= SET_ARM_BIT
;
1717 mana_gd_ring_cq(gdma_queue
, arm_bit
);
1722 static int mana_poll(struct napi_struct
*napi
, int budget
)
1724 struct mana_cq
*cq
= container_of(napi
, struct mana_cq
, napi
);
1728 cq
->budget
= budget
;
1730 w
= mana_cq_handler(cq
, cq
->gdma_cq
);
1732 return min(w
, budget
);
1735 static void mana_schedule_napi(void *context
, struct gdma_queue
*gdma_queue
)
1737 struct mana_cq
*cq
= context
;
1739 napi_schedule_irqoff(&cq
->napi
);
1742 static void mana_deinit_cq(struct mana_port_context
*apc
, struct mana_cq
*cq
)
1744 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1749 mana_gd_destroy_queue(gd
->gdma_context
, cq
->gdma_cq
);
1752 static void mana_deinit_txq(struct mana_port_context
*apc
, struct mana_txq
*txq
)
1754 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
1759 mana_gd_destroy_queue(gd
->gdma_context
, txq
->gdma_sq
);
1762 static void mana_destroy_txq(struct mana_port_context
*apc
)
1764 struct napi_struct
*napi
;
1770 for (i
= 0; i
< apc
->num_queues
; i
++) {
1771 napi
= &apc
->tx_qp
[i
].tx_cq
.napi
;
1772 napi_synchronize(napi
);
1774 netif_napi_del(napi
);
1776 mana_destroy_wq_obj(apc
, GDMA_SQ
, apc
->tx_qp
[i
].tx_object
);
1778 mana_deinit_cq(apc
, &apc
->tx_qp
[i
].tx_cq
);
1780 mana_deinit_txq(apc
, &apc
->tx_qp
[i
].txq
);
1787 static int mana_create_txq(struct mana_port_context
*apc
,
1788 struct net_device
*net
)
1790 struct mana_context
*ac
= apc
->ac
;
1791 struct gdma_dev
*gd
= ac
->gdma_dev
;
1792 struct mana_obj_spec wq_spec
;
1793 struct mana_obj_spec cq_spec
;
1794 struct gdma_queue_spec spec
;
1795 struct gdma_context
*gc
;
1796 struct mana_txq
*txq
;
1803 apc
->tx_qp
= kcalloc(apc
->num_queues
, sizeof(struct mana_tx_qp
),
1808 /* The minimum size of the WQE is 32 bytes, hence
1809 * MAX_SEND_BUFFERS_PER_QUEUE represents the maximum number of WQEs
1810 * the SQ can store. This value is then used to size other queues
1811 * to prevent overflow.
1813 txq_size
= MAX_SEND_BUFFERS_PER_QUEUE
* 32;
1814 BUILD_BUG_ON(!PAGE_ALIGNED(txq_size
));
1816 cq_size
= MAX_SEND_BUFFERS_PER_QUEUE
* COMP_ENTRY_SIZE
;
1817 cq_size
= PAGE_ALIGN(cq_size
);
1819 gc
= gd
->gdma_context
;
1821 for (i
= 0; i
< apc
->num_queues
; i
++) {
1822 apc
->tx_qp
[i
].tx_object
= INVALID_MANA_HANDLE
;
1825 txq
= &apc
->tx_qp
[i
].txq
;
1827 u64_stats_init(&txq
->stats
.syncp
);
1829 txq
->net_txq
= netdev_get_tx_queue(net
, i
);
1830 txq
->vp_offset
= apc
->tx_vp_offset
;
1831 skb_queue_head_init(&txq
->pending_skbs
);
1833 memset(&spec
, 0, sizeof(spec
));
1834 spec
.type
= GDMA_SQ
;
1835 spec
.monitor_avl_buf
= true;
1836 spec
.queue_size
= txq_size
;
1837 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &txq
->gdma_sq
);
1841 /* Create SQ's CQ */
1842 cq
= &apc
->tx_qp
[i
].tx_cq
;
1843 cq
->type
= MANA_CQ_TYPE_TX
;
1847 memset(&spec
, 0, sizeof(spec
));
1848 spec
.type
= GDMA_CQ
;
1849 spec
.monitor_avl_buf
= false;
1850 spec
.queue_size
= cq_size
;
1851 spec
.cq
.callback
= mana_schedule_napi
;
1852 spec
.cq
.parent_eq
= ac
->eqs
[i
].eq
;
1853 spec
.cq
.context
= cq
;
1854 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
1858 memset(&wq_spec
, 0, sizeof(wq_spec
));
1859 memset(&cq_spec
, 0, sizeof(cq_spec
));
1861 wq_spec
.gdma_region
= txq
->gdma_sq
->mem_info
.dma_region_handle
;
1862 wq_spec
.queue_size
= txq
->gdma_sq
->queue_size
;
1864 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.dma_region_handle
;
1865 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
1866 cq_spec
.modr_ctx_id
= 0;
1867 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
1869 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_SQ
,
1871 &apc
->tx_qp
[i
].tx_object
);
1876 txq
->gdma_sq
->id
= wq_spec
.queue_index
;
1877 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
1879 txq
->gdma_sq
->mem_info
.dma_region_handle
=
1880 GDMA_INVALID_DMA_REGION
;
1881 cq
->gdma_cq
->mem_info
.dma_region_handle
=
1882 GDMA_INVALID_DMA_REGION
;
1884 txq
->gdma_txq_id
= txq
->gdma_sq
->id
;
1886 cq
->gdma_id
= cq
->gdma_cq
->id
;
1888 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
1893 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
1895 netif_napi_add_tx(net
, &cq
->napi
, mana_poll
);
1896 napi_enable(&cq
->napi
);
1898 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
1903 mana_destroy_txq(apc
);
1907 static void mana_destroy_rxq(struct mana_port_context
*apc
,
1908 struct mana_rxq
*rxq
, bool validate_state
)
1911 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1912 struct mana_recv_buf_oob
*rx_oob
;
1913 struct device
*dev
= gc
->dev
;
1914 struct napi_struct
*napi
;
1921 napi
= &rxq
->rx_cq
.napi
;
1924 napi_synchronize(napi
);
1928 xdp_rxq_info_unreg(&rxq
->xdp_rxq
);
1930 netif_napi_del(napi
);
1932 mana_destroy_wq_obj(apc
, GDMA_RQ
, rxq
->rxobj
);
1934 mana_deinit_cq(apc
, &rxq
->rx_cq
);
1936 if (rxq
->xdp_save_va
)
1937 put_page(virt_to_head_page(rxq
->xdp_save_va
));
1939 for (i
= 0; i
< rxq
->num_rx_buf
; i
++) {
1940 rx_oob
= &rxq
->rx_oobs
[i
];
1942 if (!rx_oob
->buf_va
)
1945 dma_unmap_single(dev
, rx_oob
->sgl
[0].address
,
1946 rx_oob
->sgl
[0].size
, DMA_FROM_DEVICE
);
1948 page
= virt_to_head_page(rx_oob
->buf_va
);
1950 if (rx_oob
->from_pool
)
1951 page_pool_put_full_page(rxq
->page_pool
, page
, false);
1955 rx_oob
->buf_va
= NULL
;
1958 page_pool_destroy(rxq
->page_pool
);
1961 mana_gd_destroy_queue(gc
, rxq
->gdma_rq
);
1966 static int mana_fill_rx_oob(struct mana_recv_buf_oob
*rx_oob
, u32 mem_key
,
1967 struct mana_rxq
*rxq
, struct device
*dev
)
1969 struct mana_port_context
*mpc
= netdev_priv(rxq
->ndev
);
1970 bool from_pool
= false;
1974 if (mpc
->rxbufs_pre
)
1975 va
= mana_get_rxbuf_pre(rxq
, &da
);
1977 va
= mana_get_rxfrag(rxq
, dev
, &da
, &from_pool
, false);
1982 rx_oob
->buf_va
= va
;
1983 rx_oob
->from_pool
= from_pool
;
1985 rx_oob
->sgl
[0].address
= da
;
1986 rx_oob
->sgl
[0].size
= rxq
->datasize
;
1987 rx_oob
->sgl
[0].mem_key
= mem_key
;
1992 #define MANA_WQE_HEADER_SIZE 16
1993 #define MANA_WQE_SGE_SIZE 16
1995 static int mana_alloc_rx_wqe(struct mana_port_context
*apc
,
1996 struct mana_rxq
*rxq
, u32
*rxq_size
, u32
*cq_size
)
1998 struct gdma_context
*gc
= apc
->ac
->gdma_dev
->gdma_context
;
1999 struct mana_recv_buf_oob
*rx_oob
;
2000 struct device
*dev
= gc
->dev
;
2004 WARN_ON(rxq
->datasize
== 0);
2009 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
2010 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
2011 memset(rx_oob
, 0, sizeof(*rx_oob
));
2013 rx_oob
->num_sge
= 1;
2015 ret
= mana_fill_rx_oob(rx_oob
, apc
->ac
->gdma_dev
->gpa_mkey
, rxq
,
2020 rx_oob
->wqe_req
.sgl
= rx_oob
->sgl
;
2021 rx_oob
->wqe_req
.num_sge
= rx_oob
->num_sge
;
2022 rx_oob
->wqe_req
.inline_oob_size
= 0;
2023 rx_oob
->wqe_req
.inline_oob_data
= NULL
;
2024 rx_oob
->wqe_req
.flags
= 0;
2025 rx_oob
->wqe_req
.client_data_unit
= 0;
2027 *rxq_size
+= ALIGN(MANA_WQE_HEADER_SIZE
+
2028 MANA_WQE_SGE_SIZE
* rx_oob
->num_sge
, 32);
2029 *cq_size
+= COMP_ENTRY_SIZE
;
2035 static int mana_push_wqe(struct mana_rxq
*rxq
)
2037 struct mana_recv_buf_oob
*rx_oob
;
2041 for (buf_idx
= 0; buf_idx
< rxq
->num_rx_buf
; buf_idx
++) {
2042 rx_oob
= &rxq
->rx_oobs
[buf_idx
];
2044 err
= mana_gd_post_and_ring(rxq
->gdma_rq
, &rx_oob
->wqe_req
,
2053 static int mana_create_page_pool(struct mana_rxq
*rxq
, struct gdma_context
*gc
)
2055 struct page_pool_params pprm
= {};
2058 pprm
.pool_size
= RX_BUFFERS_PER_QUEUE
;
2059 pprm
.nid
= gc
->numa_node
;
2060 pprm
.napi
= &rxq
->rx_cq
.napi
;
2062 rxq
->page_pool
= page_pool_create(&pprm
);
2064 if (IS_ERR(rxq
->page_pool
)) {
2065 ret
= PTR_ERR(rxq
->page_pool
);
2066 rxq
->page_pool
= NULL
;
2073 static struct mana_rxq
*mana_create_rxq(struct mana_port_context
*apc
,
2074 u32 rxq_idx
, struct mana_eq
*eq
,
2075 struct net_device
*ndev
)
2077 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2078 struct mana_obj_spec wq_spec
;
2079 struct mana_obj_spec cq_spec
;
2080 struct gdma_queue_spec spec
;
2081 struct mana_cq
*cq
= NULL
;
2082 struct gdma_context
*gc
;
2083 u32 cq_size
, rq_size
;
2084 struct mana_rxq
*rxq
;
2087 gc
= gd
->gdma_context
;
2089 rxq
= kzalloc(struct_size(rxq
, rx_oobs
, RX_BUFFERS_PER_QUEUE
),
2095 rxq
->num_rx_buf
= RX_BUFFERS_PER_QUEUE
;
2096 rxq
->rxq_idx
= rxq_idx
;
2097 rxq
->rxobj
= INVALID_MANA_HANDLE
;
2099 mana_get_rxbuf_cfg(ndev
->mtu
, &rxq
->datasize
, &rxq
->alloc_size
,
2102 /* Create page pool for RX queue */
2103 err
= mana_create_page_pool(rxq
, gc
);
2105 netdev_err(ndev
, "Create page pool err:%d\n", err
);
2109 err
= mana_alloc_rx_wqe(apc
, rxq
, &rq_size
, &cq_size
);
2113 rq_size
= PAGE_ALIGN(rq_size
);
2114 cq_size
= PAGE_ALIGN(cq_size
);
2117 memset(&spec
, 0, sizeof(spec
));
2118 spec
.type
= GDMA_RQ
;
2119 spec
.monitor_avl_buf
= true;
2120 spec
.queue_size
= rq_size
;
2121 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &rxq
->gdma_rq
);
2125 /* Create RQ's CQ */
2127 cq
->type
= MANA_CQ_TYPE_RX
;
2130 memset(&spec
, 0, sizeof(spec
));
2131 spec
.type
= GDMA_CQ
;
2132 spec
.monitor_avl_buf
= false;
2133 spec
.queue_size
= cq_size
;
2134 spec
.cq
.callback
= mana_schedule_napi
;
2135 spec
.cq
.parent_eq
= eq
->eq
;
2136 spec
.cq
.context
= cq
;
2137 err
= mana_gd_create_mana_wq_cq(gd
, &spec
, &cq
->gdma_cq
);
2141 memset(&wq_spec
, 0, sizeof(wq_spec
));
2142 memset(&cq_spec
, 0, sizeof(cq_spec
));
2143 wq_spec
.gdma_region
= rxq
->gdma_rq
->mem_info
.dma_region_handle
;
2144 wq_spec
.queue_size
= rxq
->gdma_rq
->queue_size
;
2146 cq_spec
.gdma_region
= cq
->gdma_cq
->mem_info
.dma_region_handle
;
2147 cq_spec
.queue_size
= cq
->gdma_cq
->queue_size
;
2148 cq_spec
.modr_ctx_id
= 0;
2149 cq_spec
.attached_eq
= cq
->gdma_cq
->cq
.parent
->id
;
2151 err
= mana_create_wq_obj(apc
, apc
->port_handle
, GDMA_RQ
,
2152 &wq_spec
, &cq_spec
, &rxq
->rxobj
);
2156 rxq
->gdma_rq
->id
= wq_spec
.queue_index
;
2157 cq
->gdma_cq
->id
= cq_spec
.queue_index
;
2159 rxq
->gdma_rq
->mem_info
.dma_region_handle
= GDMA_INVALID_DMA_REGION
;
2160 cq
->gdma_cq
->mem_info
.dma_region_handle
= GDMA_INVALID_DMA_REGION
;
2162 rxq
->gdma_id
= rxq
->gdma_rq
->id
;
2163 cq
->gdma_id
= cq
->gdma_cq
->id
;
2165 err
= mana_push_wqe(rxq
);
2169 if (WARN_ON(cq
->gdma_id
>= gc
->max_num_cqs
)) {
2174 gc
->cq_table
[cq
->gdma_id
] = cq
->gdma_cq
;
2176 netif_napi_add_weight(ndev
, &cq
->napi
, mana_poll
, 1);
2178 WARN_ON(xdp_rxq_info_reg(&rxq
->xdp_rxq
, ndev
, rxq_idx
,
2180 WARN_ON(xdp_rxq_info_reg_mem_model(&rxq
->xdp_rxq
, MEM_TYPE_PAGE_POOL
,
2183 napi_enable(&cq
->napi
);
2185 mana_gd_ring_cq(cq
->gdma_cq
, SET_ARM_BIT
);
2190 netdev_err(ndev
, "Failed to create RXQ: err = %d\n", err
);
2192 mana_destroy_rxq(apc
, rxq
, false);
2195 mana_deinit_cq(apc
, cq
);
2200 static int mana_add_rx_queues(struct mana_port_context
*apc
,
2201 struct net_device
*ndev
)
2203 struct mana_context
*ac
= apc
->ac
;
2204 struct mana_rxq
*rxq
;
2208 for (i
= 0; i
< apc
->num_queues
; i
++) {
2209 rxq
= mana_create_rxq(apc
, i
, &ac
->eqs
[i
], ndev
);
2215 u64_stats_init(&rxq
->stats
.syncp
);
2220 apc
->default_rxobj
= apc
->rxqs
[0]->rxobj
;
2225 static void mana_destroy_vport(struct mana_port_context
*apc
)
2227 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2228 struct mana_rxq
*rxq
;
2231 for (rxq_idx
= 0; rxq_idx
< apc
->num_queues
; rxq_idx
++) {
2232 rxq
= apc
->rxqs
[rxq_idx
];
2236 mana_destroy_rxq(apc
, rxq
, true);
2237 apc
->rxqs
[rxq_idx
] = NULL
;
2240 mana_destroy_txq(apc
);
2241 mana_uncfg_vport(apc
);
2243 if (gd
->gdma_context
->is_pf
)
2244 mana_pf_deregister_hw_vport(apc
);
2247 static int mana_create_vport(struct mana_port_context
*apc
,
2248 struct net_device
*net
)
2250 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2253 apc
->default_rxobj
= INVALID_MANA_HANDLE
;
2255 if (gd
->gdma_context
->is_pf
) {
2256 err
= mana_pf_register_hw_vport(apc
);
2261 err
= mana_cfg_vport(apc
, gd
->pdid
, gd
->doorbell
);
2265 return mana_create_txq(apc
, net
);
2268 static void mana_rss_table_init(struct mana_port_context
*apc
)
2272 for (i
= 0; i
< MANA_INDIRECT_TABLE_SIZE
; i
++)
2273 apc
->indir_table
[i
] =
2274 ethtool_rxfh_indir_default(i
, apc
->num_queues
);
2277 int mana_config_rss(struct mana_port_context
*apc
, enum TRI_STATE rx
,
2278 bool update_hash
, bool update_tab
)
2285 for (i
= 0; i
< MANA_INDIRECT_TABLE_SIZE
; i
++) {
2286 queue_idx
= apc
->indir_table
[i
];
2287 apc
->rxobj_table
[i
] = apc
->rxqs
[queue_idx
]->rxobj
;
2291 err
= mana_cfg_vport_steering(apc
, rx
, true, update_hash
, update_tab
);
2295 mana_fence_rqs(apc
);
2300 void mana_query_gf_stats(struct mana_port_context
*apc
)
2302 struct mana_query_gf_stat_resp resp
= {};
2303 struct mana_query_gf_stat_req req
= {};
2304 struct net_device
*ndev
= apc
->ndev
;
2307 mana_gd_init_req_hdr(&req
.hdr
, MANA_QUERY_GF_STAT
,
2308 sizeof(req
), sizeof(resp
));
2309 req
.req_stats
= STATISTICS_FLAGS_HC_TX_BYTES
|
2310 STATISTICS_FLAGS_HC_TX_UCAST_PACKETS
|
2311 STATISTICS_FLAGS_HC_TX_UCAST_BYTES
|
2312 STATISTICS_FLAGS_HC_TX_MCAST_PACKETS
|
2313 STATISTICS_FLAGS_HC_TX_MCAST_BYTES
|
2314 STATISTICS_FLAGS_HC_TX_BCAST_PACKETS
|
2315 STATISTICS_FLAGS_HC_TX_BCAST_BYTES
;
2317 err
= mana_send_request(apc
->ac
, &req
, sizeof(req
), &resp
,
2320 netdev_err(ndev
, "Failed to query GF stats: %d\n", err
);
2323 err
= mana_verify_resp_hdr(&resp
.hdr
, MANA_QUERY_GF_STAT
,
2325 if (err
|| resp
.hdr
.status
) {
2326 netdev_err(ndev
, "Failed to query GF stats: %d, 0x%x\n", err
,
2331 apc
->eth_stats
.hc_tx_bytes
= resp
.hc_tx_bytes
;
2332 apc
->eth_stats
.hc_tx_ucast_pkts
= resp
.hc_tx_ucast_pkts
;
2333 apc
->eth_stats
.hc_tx_ucast_bytes
= resp
.hc_tx_ucast_bytes
;
2334 apc
->eth_stats
.hc_tx_bcast_pkts
= resp
.hc_tx_bcast_pkts
;
2335 apc
->eth_stats
.hc_tx_bcast_bytes
= resp
.hc_tx_bcast_bytes
;
2336 apc
->eth_stats
.hc_tx_mcast_pkts
= resp
.hc_tx_mcast_pkts
;
2337 apc
->eth_stats
.hc_tx_mcast_bytes
= resp
.hc_tx_mcast_bytes
;
2340 static int mana_init_port(struct net_device
*ndev
)
2342 struct mana_port_context
*apc
= netdev_priv(ndev
);
2343 u32 max_txq
, max_rxq
, max_queues
;
2344 int port_idx
= apc
->port_idx
;
2345 u32 num_indirect_entries
;
2348 err
= mana_init_port_context(apc
);
2352 err
= mana_query_vport_cfg(apc
, port_idx
, &max_txq
, &max_rxq
,
2353 &num_indirect_entries
);
2355 netdev_err(ndev
, "Failed to query info for vPort %d\n",
2360 max_queues
= min_t(u32
, max_txq
, max_rxq
);
2361 if (apc
->max_queues
> max_queues
)
2362 apc
->max_queues
= max_queues
;
2364 if (apc
->num_queues
> apc
->max_queues
)
2365 apc
->num_queues
= apc
->max_queues
;
2367 eth_hw_addr_set(ndev
, apc
->mac_addr
);
2377 int mana_alloc_queues(struct net_device
*ndev
)
2379 struct mana_port_context
*apc
= netdev_priv(ndev
);
2380 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2383 err
= mana_create_vport(apc
, ndev
);
2387 err
= netif_set_real_num_tx_queues(ndev
, apc
->num_queues
);
2391 err
= mana_add_rx_queues(apc
, ndev
);
2395 apc
->rss_state
= apc
->num_queues
> 1 ? TRI_STATE_TRUE
: TRI_STATE_FALSE
;
2397 err
= netif_set_real_num_rx_queues(ndev
, apc
->num_queues
);
2401 mana_rss_table_init(apc
);
2403 err
= mana_config_rss(apc
, TRI_STATE_TRUE
, true, true);
2407 if (gd
->gdma_context
->is_pf
) {
2408 err
= mana_pf_register_filter(apc
);
2413 mana_chn_setxdp(apc
, mana_xdp_get(apc
));
2418 mana_destroy_vport(apc
);
2422 int mana_attach(struct net_device
*ndev
)
2424 struct mana_port_context
*apc
= netdev_priv(ndev
);
2429 err
= mana_init_port(ndev
);
2433 if (apc
->port_st_save
) {
2434 err
= mana_alloc_queues(ndev
);
2436 mana_cleanup_port_context(apc
);
2441 apc
->port_is_up
= apc
->port_st_save
;
2443 /* Ensure port state updated before txq state */
2446 if (apc
->port_is_up
)
2447 netif_carrier_on(ndev
);
2449 netif_device_attach(ndev
);
2454 static int mana_dealloc_queues(struct net_device
*ndev
)
2456 struct mana_port_context
*apc
= netdev_priv(ndev
);
2457 unsigned long timeout
= jiffies
+ 120 * HZ
;
2458 struct gdma_dev
*gd
= apc
->ac
->gdma_dev
;
2459 struct mana_txq
*txq
;
2460 struct sk_buff
*skb
;
2464 if (apc
->port_is_up
)
2467 mana_chn_setxdp(apc
, NULL
);
2469 if (gd
->gdma_context
->is_pf
)
2470 mana_pf_deregister_filter(apc
);
2472 /* No packet can be transmitted now since apc->port_is_up is false.
2473 * There is still a tiny chance that mana_poll_tx_cq() can re-enable
2474 * a txq because it may not timely see apc->port_is_up being cleared
2475 * to false, but it doesn't matter since mana_start_xmit() drops any
2476 * new packets due to apc->port_is_up being false.
2478 * Drain all the in-flight TX packets.
2479 * A timeout of 120 seconds for all the queues is used.
2480 * This will break the while loop when h/w is not responding.
2481 * This value of 120 has been decided here considering max
2485 for (i
= 0; i
< apc
->num_queues
; i
++) {
2486 txq
= &apc
->tx_qp
[i
].txq
;
2488 while (atomic_read(&txq
->pending_sends
) > 0 &&
2489 time_before(jiffies
, timeout
)) {
2490 usleep_range(tsleep
, tsleep
+ 1000);
2493 if (atomic_read(&txq
->pending_sends
)) {
2494 err
= pcie_flr(to_pci_dev(gd
->gdma_context
->dev
));
2496 netdev_err(ndev
, "flr failed %d with %d pkts pending in txq %u\n",
2497 err
, atomic_read(&txq
->pending_sends
),
2504 for (i
= 0; i
< apc
->num_queues
; i
++) {
2505 txq
= &apc
->tx_qp
[i
].txq
;
2506 while ((skb
= skb_dequeue(&txq
->pending_skbs
))) {
2507 mana_unmap_skb(skb
, apc
);
2508 dev_kfree_skb_any(skb
);
2510 atomic_set(&txq
->pending_sends
, 0);
2512 /* We're 100% sure the queues can no longer be woken up, because
2513 * we're sure now mana_poll_tx_cq() can't be running.
2516 apc
->rss_state
= TRI_STATE_FALSE
;
2517 err
= mana_config_rss(apc
, TRI_STATE_FALSE
, false, false);
2519 netdev_err(ndev
, "Failed to disable vPort: %d\n", err
);
2523 mana_destroy_vport(apc
);
2528 int mana_detach(struct net_device
*ndev
, bool from_close
)
2530 struct mana_port_context
*apc
= netdev_priv(ndev
);
2535 apc
->port_st_save
= apc
->port_is_up
;
2536 apc
->port_is_up
= false;
2538 /* Ensure port state updated before txq state */
2541 netif_tx_disable(ndev
);
2542 netif_carrier_off(ndev
);
2544 if (apc
->port_st_save
) {
2545 err
= mana_dealloc_queues(ndev
);
2551 netif_device_detach(ndev
);
2552 mana_cleanup_port_context(apc
);
2558 static int mana_probe_port(struct mana_context
*ac
, int port_idx
,
2559 struct net_device
**ndev_storage
)
2561 struct gdma_context
*gc
= ac
->gdma_dev
->gdma_context
;
2562 struct mana_port_context
*apc
;
2563 struct net_device
*ndev
;
2566 ndev
= alloc_etherdev_mq(sizeof(struct mana_port_context
),
2567 gc
->max_num_queues
);
2571 *ndev_storage
= ndev
;
2573 apc
= netdev_priv(ndev
);
2576 apc
->max_queues
= gc
->max_num_queues
;
2577 apc
->num_queues
= gc
->max_num_queues
;
2578 apc
->port_handle
= INVALID_MANA_HANDLE
;
2579 apc
->pf_filter_handle
= INVALID_MANA_HANDLE
;
2580 apc
->port_idx
= port_idx
;
2582 mutex_init(&apc
->vport_mutex
);
2583 apc
->vport_use_count
= 0;
2585 ndev
->netdev_ops
= &mana_devops
;
2586 ndev
->ethtool_ops
= &mana_ethtool_ops
;
2587 ndev
->mtu
= ETH_DATA_LEN
;
2588 ndev
->max_mtu
= gc
->adapter_mtu
- ETH_HLEN
;
2589 ndev
->min_mtu
= ETH_MIN_MTU
;
2590 ndev
->needed_headroom
= MANA_HEADROOM
;
2591 ndev
->dev_port
= port_idx
;
2592 SET_NETDEV_DEV(ndev
, gc
->dev
);
2594 netif_carrier_off(ndev
);
2596 netdev_rss_key_fill(apc
->hashkey
, MANA_HASH_KEY_SIZE
);
2598 err
= mana_init_port(ndev
);
2602 netdev_lockdep_set_classes(ndev
);
2604 ndev
->hw_features
= NETIF_F_SG
| NETIF_F_IP_CSUM
| NETIF_F_IPV6_CSUM
;
2605 ndev
->hw_features
|= NETIF_F_RXCSUM
;
2606 ndev
->hw_features
|= NETIF_F_TSO
| NETIF_F_TSO6
;
2607 ndev
->hw_features
|= NETIF_F_RXHASH
;
2608 ndev
->features
= ndev
->hw_features
| NETIF_F_HW_VLAN_CTAG_TX
|
2609 NETIF_F_HW_VLAN_CTAG_RX
;
2610 ndev
->vlan_features
= ndev
->features
;
2611 ndev
->xdp_features
= NETDEV_XDP_ACT_BASIC
| NETDEV_XDP_ACT_REDIRECT
|
2612 NETDEV_XDP_ACT_NDO_XMIT
;
2614 err
= register_netdev(ndev
);
2616 netdev_err(ndev
, "Unable to register netdev.\n");
2626 *ndev_storage
= NULL
;
2627 netdev_err(ndev
, "Failed to probe vPort %d: %d\n", port_idx
, err
);
2632 static void adev_release(struct device
*dev
)
2634 struct mana_adev
*madev
= container_of(dev
, struct mana_adev
, adev
.dev
);
2639 static void remove_adev(struct gdma_dev
*gd
)
2641 struct auxiliary_device
*adev
= gd
->adev
;
2644 auxiliary_device_delete(adev
);
2645 auxiliary_device_uninit(adev
);
2647 mana_adev_idx_free(id
);
2651 static int add_adev(struct gdma_dev
*gd
)
2653 struct auxiliary_device
*adev
;
2654 struct mana_adev
*madev
;
2657 madev
= kzalloc(sizeof(*madev
), GFP_KERNEL
);
2661 adev
= &madev
->adev
;
2662 ret
= mana_adev_idx_alloc();
2667 adev
->name
= "rdma";
2668 adev
->dev
.parent
= gd
->gdma_context
->dev
;
2669 adev
->dev
.release
= adev_release
;
2672 ret
= auxiliary_device_init(adev
);
2676 ret
= auxiliary_device_add(adev
);
2684 auxiliary_device_uninit(adev
);
2687 mana_adev_idx_free(adev
->id
);
2695 int mana_probe(struct gdma_dev
*gd
, bool resuming
)
2697 struct gdma_context
*gc
= gd
->gdma_context
;
2698 struct mana_context
*ac
= gd
->driver_data
;
2699 struct device
*dev
= gc
->dev
;
2705 "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n",
2706 MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
, MANA_MICRO_VERSION
);
2708 err
= mana_gd_register_device(gd
);
2713 ac
= kzalloc(sizeof(*ac
), GFP_KERNEL
);
2718 gd
->driver_data
= ac
;
2721 err
= mana_create_eq(ac
);
2725 err
= mana_query_device_cfg(ac
, MANA_MAJOR_VERSION
, MANA_MINOR_VERSION
,
2726 MANA_MICRO_VERSION
, &num_ports
);
2731 ac
->num_ports
= num_ports
;
2733 if (ac
->num_ports
!= num_ports
) {
2734 dev_err(dev
, "The number of vPorts changed: %d->%d\n",
2735 ac
->num_ports
, num_ports
);
2741 if (ac
->num_ports
== 0)
2742 dev_err(dev
, "Failed to detect any vPort\n");
2744 if (ac
->num_ports
> MAX_PORTS_IN_MANA_DEV
)
2745 ac
->num_ports
= MAX_PORTS_IN_MANA_DEV
;
2748 for (i
= 0; i
< ac
->num_ports
; i
++) {
2749 err
= mana_probe_port(ac
, i
, &ac
->ports
[i
]);
2754 for (i
= 0; i
< ac
->num_ports
; i
++) {
2756 err
= mana_attach(ac
->ports
[i
]);
2766 mana_remove(gd
, false);
2771 void mana_remove(struct gdma_dev
*gd
, bool suspending
)
2773 struct gdma_context
*gc
= gd
->gdma_context
;
2774 struct mana_context
*ac
= gd
->driver_data
;
2775 struct device
*dev
= gc
->dev
;
2776 struct net_device
*ndev
;
2780 /* adev currently doesn't support suspending, always remove it */
2784 for (i
= 0; i
< ac
->num_ports
; i
++) {
2785 ndev
= ac
->ports
[i
];
2788 dev_err(dev
, "No net device to remove\n");
2792 /* All cleanup actions should stay after rtnl_lock(), otherwise
2793 * other functions may access partially cleaned up data.
2797 err
= mana_detach(ndev
, false);
2799 netdev_err(ndev
, "Failed to detach vPort %d: %d\n",
2803 /* No need to unregister the ndev. */
2808 unregister_netdevice(ndev
);
2815 mana_destroy_eq(ac
);
2817 mana_gd_deregister_device(gd
);
2822 gd
->driver_data
= NULL
;
2823 gd
->gdma_context
= NULL
;